diff options
Diffstat (limited to 'thirdparty/mbedtls/library/sha512.c')
-rw-r--r-- | thirdparty/mbedtls/library/sha512.c | 753 |
1 files changed, 613 insertions, 140 deletions
diff --git a/thirdparty/mbedtls/library/sha512.c b/thirdparty/mbedtls/library/sha512.c index 77bdc2ec23..6dcea8da5d 100644 --- a/thirdparty/mbedtls/library/sha512.c +++ b/thirdparty/mbedtls/library/sha512.c @@ -10,9 +10,25 @@ * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf */ +#if defined(__aarch64__) && !defined(__ARM_FEATURE_SHA512) && \ + defined(__clang__) && __clang_major__ >= 7 +/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged. + * + * The intrinsic declaration are guarded by predefined ACLE macros in clang: + * these are normally only enabled by the -march option on the command line. + * By defining the macros ourselves we gain access to those declarations without + * requiring -march on the command line. + * + * `arm_neon.h` is included by common.h, so we put these defines + * at the top of this file, before any includes. + */ +#define __ARM_FEATURE_SHA512 1 +#define MBEDTLS_ENABLE_ARM_SHA3_EXTENSIONS_COMPILER_FLAG +#endif + #include "common.h" -#if defined(MBEDTLS_SHA512_C) +#if defined(MBEDTLS_SHA512_C) || defined(MBEDTLS_SHA384_C) #include "mbedtls/sha512.h" #include "mbedtls/platform_util.h" @@ -28,12 +44,171 @@ #include "mbedtls/platform.h" -#define SHA512_VALIDATE_RET(cond) \ - MBEDTLS_INTERNAL_VALIDATE_RET(cond, MBEDTLS_ERR_SHA512_BAD_INPUT_DATA) -#define SHA512_VALIDATE(cond) MBEDTLS_INTERNAL_VALIDATE(cond) +#if defined(__aarch64__) +# if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \ + defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY) +/* *INDENT-OFF* */ +# if !defined(MBEDTLS_HAVE_NEON_INTRINSICS) +# error "Target does not support NEON instructions" +# endif +/* + * Best performance comes from most recent compilers, with intrinsics and -O3. + * Must compile with -march=armv8.2-a+sha3, but we can't detect armv8.2-a, and + * can't always detect __ARM_FEATURE_SHA512 (notably clang 7-12). + * + * GCC < 8 won't work at all (lacks the sha512 instructions) + * GCC >= 8 uses intrinsics, sets __ARM_FEATURE_SHA512 + * + * Clang < 7 won't work at all (lacks the sha512 instructions) + * Clang 7-12 don't have intrinsics (but we work around that with inline + * assembler) or __ARM_FEATURE_SHA512 + * Clang == 13.0.0 same as clang 12 (only seen on macOS) + * Clang >= 13.0.1 has __ARM_FEATURE_SHA512 and intrinsics + */ +# if !defined(__ARM_FEATURE_SHA512) || defined(MBEDTLS_ENABLE_ARM_SHA3_EXTENSIONS_COMPILER_FLAG) + /* Test Clang first, as it defines __GNUC__ */ +# if defined(__ARMCOMPILER_VERSION) +# if __ARMCOMPILER_VERSION < 6090000 +# error "A more recent armclang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*" +# elif __ARMCOMPILER_VERSION == 6090000 +# error "Must use minimum -march=armv8.2-a+sha3 for MBEDTLS_SHA512_USE_A64_CRYPTO_*" +# else +# pragma clang attribute push (__attribute__((target("sha3"))), apply_to=function) +# define MBEDTLS_POP_TARGET_PRAGMA +# endif +# elif defined(__clang__) +# if __clang_major__ < 7 +# error "A more recent Clang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*" +# else +# pragma clang attribute push (__attribute__((target("sha3"))), apply_to=function) +# define MBEDTLS_POP_TARGET_PRAGMA +# endif +# elif defined(__GNUC__) +# if __GNUC__ < 8 +# error "A more recent GCC is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*" +# else +# pragma GCC push_options +# pragma GCC target ("arch=armv8.2-a+sha3") +# define MBEDTLS_POP_TARGET_PRAGMA +# endif +# else +# error "Only GCC and Clang supported for MBEDTLS_SHA512_USE_A64_CRYPTO_*" +# endif +# endif +/* *INDENT-ON* */ +# endif +# if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) +# if defined(__unix__) +# if defined(__linux__) +/* Our preferred method of detection is getauxval() */ +# include <sys/auxv.h> +# if !defined(HWCAP_SHA512) +/* The same header that declares getauxval() should provide the HWCAP_xxx + * constants to analyze its return value. However, the libc may be too + * old to have the constant that we need. So if it's missing, assume that + * the value is the same one used by the Linux kernel ABI. + */ +# define HWCAP_SHA512 (1 << 21) +# endif +# endif +/* Use SIGILL on Unix, and fall back to it on Linux */ +# include <signal.h> +# endif +# endif +#elif !defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) +# undef MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY +# undef MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT +#endif + +#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) +/* + * Capability detection code comes early, so we can disable + * MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT if no detection mechanism found + */ +#if defined(HWCAP_SHA512) +static int mbedtls_a64_crypto_sha512_determine_support(void) +{ + return (getauxval(AT_HWCAP) & HWCAP_SHA512) ? 1 : 0; +} +#elif defined(__APPLE__) +#include <sys/types.h> +#include <sys/sysctl.h> + +static int mbedtls_a64_crypto_sha512_determine_support(void) +{ + int value = 0; + size_t value_len = sizeof(value); + + int ret = sysctlbyname("hw.optional.armv8_2_sha512", &value, &value_len, + NULL, 0); + return ret == 0 && value != 0; +} +#elif defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64) +/* + * As of March 2022, there don't appear to be any PF_ARM_V8_* flags + * available to pass to IsProcessorFeaturePresent() to check for + * SHA-512 support. So we fall back to the C code only. + */ +#if defined(_MSC_VER) +#pragma message "No mechanism to detect A64_CRYPTO found, using C code only" +#else +#warning "No mechanism to detect A64_CRYPTO found, using C code only" +#endif +#elif defined(__unix__) && defined(SIG_SETMASK) +/* Detection with SIGILL, setjmp() and longjmp() */ +#include <signal.h> +#include <setjmp.h> + +static jmp_buf return_from_sigill; + +/* + * A64 SHA512 support detection via SIGILL + */ +static void sigill_handler(int signal) +{ + (void) signal; + longjmp(return_from_sigill, 1); +} + +static int mbedtls_a64_crypto_sha512_determine_support(void) +{ + struct sigaction old_action, new_action; + + sigset_t old_mask; + if (sigprocmask(0, NULL, &old_mask)) { + return 0; + } + + sigemptyset(&new_action.sa_mask); + new_action.sa_flags = 0; + new_action.sa_handler = sigill_handler; + + sigaction(SIGILL, &new_action, &old_action); + + static int ret = 0; + + if (setjmp(return_from_sigill) == 0) { /* First return only */ + /* If this traps, we will return a second time from setjmp() with 1 */ + asm ("sha512h q0, q0, v0.2d" : : : "v0"); + ret = 1; + } + + sigaction(SIGILL, &old_action, NULL); + sigprocmask(SIG_SETMASK, &old_mask, NULL); + + return ret; +} +#else +#warning "No mechanism to detect A64_CRYPTO found, using C code only" +#undef MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT +#endif /* HWCAP_SHA512, __APPLE__, __unix__ && SIG_SETMASK */ + +#endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT */ #if !defined(MBEDTLS_SHA512_ALT) +#define SHA512_BLOCK_SIZE 128 + #if defined(MBEDTLS_SHA512_SMALLER) static void sha512_put_uint64_be(uint64_t n, unsigned char *b, uint8_t i) { @@ -45,8 +220,6 @@ static void sha512_put_uint64_be(uint64_t n, unsigned char *b, uint8_t i) void mbedtls_sha512_init(mbedtls_sha512_context *ctx) { - SHA512_VALIDATE(ctx != NULL); - memset(ctx, 0, sizeof(mbedtls_sha512_context)); } @@ -62,29 +235,33 @@ void mbedtls_sha512_free(mbedtls_sha512_context *ctx) void mbedtls_sha512_clone(mbedtls_sha512_context *dst, const mbedtls_sha512_context *src) { - SHA512_VALIDATE(dst != NULL); - SHA512_VALIDATE(src != NULL); - *dst = *src; } /* * SHA-512 context setup */ -int mbedtls_sha512_starts_ret(mbedtls_sha512_context *ctx, int is384) +int mbedtls_sha512_starts(mbedtls_sha512_context *ctx, int is384) { - SHA512_VALIDATE_RET(ctx != NULL); -#if !defined(MBEDTLS_SHA512_NO_SHA384) - SHA512_VALIDATE_RET(is384 == 0 || is384 == 1); -#else - SHA512_VALIDATE_RET(is384 == 0); +#if defined(MBEDTLS_SHA384_C) && defined(MBEDTLS_SHA512_C) + if (is384 != 0 && is384 != 1) { + return MBEDTLS_ERR_SHA512_BAD_INPUT_DATA; + } +#elif defined(MBEDTLS_SHA512_C) + if (is384 != 0) { + return MBEDTLS_ERR_SHA512_BAD_INPUT_DATA; + } +#else /* defined MBEDTLS_SHA384_C only */ + if (is384 == 0) { + return MBEDTLS_ERR_SHA512_BAD_INPUT_DATA; + } #endif ctx->total[0] = 0; ctx->total[1] = 0; if (is384 == 0) { - /* SHA-512 */ +#if defined(MBEDTLS_SHA512_C) ctx->state[0] = UL64(0x6A09E667F3BCC908); ctx->state[1] = UL64(0xBB67AE8584CAA73B); ctx->state[2] = UL64(0x3C6EF372FE94F82B); @@ -93,11 +270,9 @@ int mbedtls_sha512_starts_ret(mbedtls_sha512_context *ctx, int is384) ctx->state[5] = UL64(0x9B05688C2B3E6C1F); ctx->state[6] = UL64(0x1F83D9ABFB41BD6B); ctx->state[7] = UL64(0x5BE0CD19137E2179); +#endif /* MBEDTLS_SHA512_C */ } else { -#if defined(MBEDTLS_SHA512_NO_SHA384) - return MBEDTLS_ERR_SHA512_BAD_INPUT_DATA; -#else - /* SHA-384 */ +#if defined(MBEDTLS_SHA384_C) ctx->state[0] = UL64(0xCBBB9D5DC1059ED8); ctx->state[1] = UL64(0x629A292A367CD507); ctx->state[2] = UL64(0x9159015A3070DD17); @@ -106,24 +281,16 @@ int mbedtls_sha512_starts_ret(mbedtls_sha512_context *ctx, int is384) ctx->state[5] = UL64(0x8EB44A8768581511); ctx->state[6] = UL64(0xDB0C2E0D64F98FA7); ctx->state[7] = UL64(0x47B5481DBEFA4FA4); -#endif /* MBEDTLS_SHA512_NO_SHA384 */ +#endif /* MBEDTLS_SHA384_C */ } -#if !defined(MBEDTLS_SHA512_NO_SHA384) +#if defined(MBEDTLS_SHA384_C) ctx->is384 = is384; #endif return 0; } -#if !defined(MBEDTLS_DEPRECATED_REMOVED) -void mbedtls_sha512_starts(mbedtls_sha512_context *ctx, - int is384) -{ - mbedtls_sha512_starts_ret(ctx, is384); -} -#endif - #if !defined(MBEDTLS_SHA512_PROCESS_ALT) /* @@ -172,9 +339,267 @@ static const uint64_t K[80] = UL64(0x4CC5D4BECB3E42B6), UL64(0x597F299CFC657E2A), UL64(0x5FCB6FAB3AD6FAEC), UL64(0x6C44198C4A475817) }; +#endif -int mbedtls_internal_sha512_process(mbedtls_sha512_context *ctx, - const unsigned char data[128]) +#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \ + defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY) + +#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY) +# define mbedtls_internal_sha512_process_many_a64_crypto mbedtls_internal_sha512_process_many +# define mbedtls_internal_sha512_process_a64_crypto mbedtls_internal_sha512_process +#endif + +/* Accelerated SHA-512 implementation originally written by Simon Tatham for PuTTY, + * under the MIT licence; dual-licensed as Apache 2 with his kind permission. + */ + +#if defined(__clang__) && \ + (__clang_major__ < 13 || \ + (__clang_major__ == 13 && __clang_minor__ == 0 && __clang_patchlevel__ == 0)) +static inline uint64x2_t vsha512su0q_u64(uint64x2_t x, uint64x2_t y) +{ + asm ("sha512su0 %0.2D,%1.2D" : "+w" (x) : "w" (y)); + return x; +} +static inline uint64x2_t vsha512su1q_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z) +{ + asm ("sha512su1 %0.2D,%1.2D,%2.2D" : "+w" (x) : "w" (y), "w" (z)); + return x; +} +static inline uint64x2_t vsha512hq_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z) +{ + asm ("sha512h %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z)); + return x; +} +static inline uint64x2_t vsha512h2q_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z) +{ + asm ("sha512h2 %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z)); + return x; +} +#endif /* __clang__ etc */ + +static size_t mbedtls_internal_sha512_process_many_a64_crypto( + mbedtls_sha512_context *ctx, const uint8_t *msg, size_t len) +{ + uint64x2_t ab = vld1q_u64(&ctx->state[0]); + uint64x2_t cd = vld1q_u64(&ctx->state[2]); + uint64x2_t ef = vld1q_u64(&ctx->state[4]); + uint64x2_t gh = vld1q_u64(&ctx->state[6]); + + size_t processed = 0; + + for (; + len >= SHA512_BLOCK_SIZE; + processed += SHA512_BLOCK_SIZE, + msg += SHA512_BLOCK_SIZE, + len -= SHA512_BLOCK_SIZE) { + uint64x2_t initial_sum, sum, intermed; + + uint64x2_t ab_orig = ab; + uint64x2_t cd_orig = cd; + uint64x2_t ef_orig = ef; + uint64x2_t gh_orig = gh; + + uint64x2_t s0 = (uint64x2_t) vld1q_u8(msg + 16 * 0); + uint64x2_t s1 = (uint64x2_t) vld1q_u8(msg + 16 * 1); + uint64x2_t s2 = (uint64x2_t) vld1q_u8(msg + 16 * 2); + uint64x2_t s3 = (uint64x2_t) vld1q_u8(msg + 16 * 3); + uint64x2_t s4 = (uint64x2_t) vld1q_u8(msg + 16 * 4); + uint64x2_t s5 = (uint64x2_t) vld1q_u8(msg + 16 * 5); + uint64x2_t s6 = (uint64x2_t) vld1q_u8(msg + 16 * 6); + uint64x2_t s7 = (uint64x2_t) vld1q_u8(msg + 16 * 7); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ /* assume LE if these not defined; untested on BE */ + s0 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s0))); + s1 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s1))); + s2 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s2))); + s3 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s3))); + s4 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s4))); + s5 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s5))); + s6 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s6))); + s7 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s7))); +#endif + + /* Rounds 0 and 1 */ + initial_sum = vaddq_u64(s0, vld1q_u64(&K[0])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + /* Rounds 2 and 3 */ + initial_sum = vaddq_u64(s1, vld1q_u64(&K[2])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + /* Rounds 4 and 5 */ + initial_sum = vaddq_u64(s2, vld1q_u64(&K[4])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + /* Rounds 6 and 7 */ + initial_sum = vaddq_u64(s3, vld1q_u64(&K[6])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + /* Rounds 8 and 9 */ + initial_sum = vaddq_u64(s4, vld1q_u64(&K[8])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + /* Rounds 10 and 11 */ + initial_sum = vaddq_u64(s5, vld1q_u64(&K[10])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + /* Rounds 12 and 13 */ + initial_sum = vaddq_u64(s6, vld1q_u64(&K[12])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + /* Rounds 14 and 15 */ + initial_sum = vaddq_u64(s7, vld1q_u64(&K[14])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + for (unsigned int t = 16; t < 80; t += 16) { + /* Rounds t and t + 1 */ + s0 = vsha512su1q_u64(vsha512su0q_u64(s0, s1), s7, vextq_u64(s4, s5, 1)); + initial_sum = vaddq_u64(s0, vld1q_u64(&K[t])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + /* Rounds t + 2 and t + 3 */ + s1 = vsha512su1q_u64(vsha512su0q_u64(s1, s2), s0, vextq_u64(s5, s6, 1)); + initial_sum = vaddq_u64(s1, vld1q_u64(&K[t + 2])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + /* Rounds t + 4 and t + 5 */ + s2 = vsha512su1q_u64(vsha512su0q_u64(s2, s3), s1, vextq_u64(s6, s7, 1)); + initial_sum = vaddq_u64(s2, vld1q_u64(&K[t + 4])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + /* Rounds t + 6 and t + 7 */ + s3 = vsha512su1q_u64(vsha512su0q_u64(s3, s4), s2, vextq_u64(s7, s0, 1)); + initial_sum = vaddq_u64(s3, vld1q_u64(&K[t + 6])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + /* Rounds t + 8 and t + 9 */ + s4 = vsha512su1q_u64(vsha512su0q_u64(s4, s5), s3, vextq_u64(s0, s1, 1)); + initial_sum = vaddq_u64(s4, vld1q_u64(&K[t + 8])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + /* Rounds t + 10 and t + 11 */ + s5 = vsha512su1q_u64(vsha512su0q_u64(s5, s6), s4, vextq_u64(s1, s2, 1)); + initial_sum = vaddq_u64(s5, vld1q_u64(&K[t + 10])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + /* Rounds t + 12 and t + 13 */ + s6 = vsha512su1q_u64(vsha512su0q_u64(s6, s7), s5, vextq_u64(s2, s3, 1)); + initial_sum = vaddq_u64(s6, vld1q_u64(&K[t + 12])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + /* Rounds t + 14 and t + 15 */ + s7 = vsha512su1q_u64(vsha512su0q_u64(s7, s0), s6, vextq_u64(s3, s4, 1)); + initial_sum = vaddq_u64(s7, vld1q_u64(&K[t + 14])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + } + + ab = vaddq_u64(ab, ab_orig); + cd = vaddq_u64(cd, cd_orig); + ef = vaddq_u64(ef, ef_orig); + gh = vaddq_u64(gh, gh_orig); + } + + vst1q_u64(&ctx->state[0], ab); + vst1q_u64(&ctx->state[2], cd); + vst1q_u64(&ctx->state[4], ef); + vst1q_u64(&ctx->state[6], gh); + + return processed; +} + +#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) +/* + * This function is for internal use only if we are building both C and A64 + * versions, otherwise it is renamed to be the public mbedtls_internal_sha512_process() + */ +static +#endif +int mbedtls_internal_sha512_process_a64_crypto(mbedtls_sha512_context *ctx, + const unsigned char data[SHA512_BLOCK_SIZE]) +{ + return (mbedtls_internal_sha512_process_many_a64_crypto(ctx, data, + SHA512_BLOCK_SIZE) == + SHA512_BLOCK_SIZE) ? 0 : -1; +} + +#endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */ + +#if defined(MBEDTLS_POP_TARGET_PRAGMA) +#if defined(__clang__) +#pragma clang attribute pop +#elif defined(__GNUC__) +#pragma GCC pop_options +#endif +#undef MBEDTLS_POP_TARGET_PRAGMA +#endif + + +#if !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) +#define mbedtls_internal_sha512_process_many_c mbedtls_internal_sha512_process_many +#define mbedtls_internal_sha512_process_c mbedtls_internal_sha512_process +#endif + + +#if !defined(MBEDTLS_SHA512_PROCESS_ALT) && !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY) + +#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) +/* + * This function is for internal use only if we are building both C and A64 + * versions, otherwise it is renamed to be the public mbedtls_internal_sha512_process() + */ +static +#endif +int mbedtls_internal_sha512_process_c(mbedtls_sha512_context *ctx, + const unsigned char data[SHA512_BLOCK_SIZE]) { int i; struct { @@ -182,9 +607,6 @@ int mbedtls_internal_sha512_process(mbedtls_sha512_context *ctx, uint64_t A[8]; } local; - SHA512_VALIDATE_RET(ctx != NULL); - SHA512_VALIDATE_RET((const unsigned char *) data != NULL); - #define SHR(x, n) ((x) >> (n)) #define ROTR(x, n) (SHR((x), (n)) | ((x) << (64 - (n)))) @@ -268,35 +690,87 @@ int mbedtls_internal_sha512_process(mbedtls_sha512_context *ctx, return 0; } -#if !defined(MBEDTLS_DEPRECATED_REMOVED) -void mbedtls_sha512_process(mbedtls_sha512_context *ctx, - const unsigned char data[128]) +#endif /* !MBEDTLS_SHA512_PROCESS_ALT && !MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */ + + +#if !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY) + +static size_t mbedtls_internal_sha512_process_many_c( + mbedtls_sha512_context *ctx, const uint8_t *data, size_t len) { - mbedtls_internal_sha512_process(ctx, data); + size_t processed = 0; + + while (len >= SHA512_BLOCK_SIZE) { + if (mbedtls_internal_sha512_process_c(ctx, data) != 0) { + return 0; + } + + data += SHA512_BLOCK_SIZE; + len -= SHA512_BLOCK_SIZE; + + processed += SHA512_BLOCK_SIZE; + } + + return processed; } -#endif -#endif /* !MBEDTLS_SHA512_PROCESS_ALT */ + +#endif /* !MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */ + + +#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) + +static int mbedtls_a64_crypto_sha512_has_support(void) +{ + static int done = 0; + static int supported = 0; + + if (!done) { + supported = mbedtls_a64_crypto_sha512_determine_support(); + done = 1; + } + + return supported; +} + +static size_t mbedtls_internal_sha512_process_many(mbedtls_sha512_context *ctx, + const uint8_t *msg, size_t len) +{ + if (mbedtls_a64_crypto_sha512_has_support()) { + return mbedtls_internal_sha512_process_many_a64_crypto(ctx, msg, len); + } else { + return mbedtls_internal_sha512_process_many_c(ctx, msg, len); + } +} + +int mbedtls_internal_sha512_process(mbedtls_sha512_context *ctx, + const unsigned char data[SHA512_BLOCK_SIZE]) +{ + if (mbedtls_a64_crypto_sha512_has_support()) { + return mbedtls_internal_sha512_process_a64_crypto(ctx, data); + } else { + return mbedtls_internal_sha512_process_c(ctx, data); + } +} + +#endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT */ /* * SHA-512 process buffer */ -int mbedtls_sha512_update_ret(mbedtls_sha512_context *ctx, - const unsigned char *input, - size_t ilen) +int mbedtls_sha512_update(mbedtls_sha512_context *ctx, + const unsigned char *input, + size_t ilen) { int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; size_t fill; unsigned int left; - SHA512_VALIDATE_RET(ctx != NULL); - SHA512_VALIDATE_RET(ilen == 0 || input != NULL); - if (ilen == 0) { return 0; } left = (unsigned int) (ctx->total[0] & 0x7F); - fill = 128 - left; + fill = SHA512_BLOCK_SIZE - left; ctx->total[0] += (uint64_t) ilen; @@ -316,13 +790,15 @@ int mbedtls_sha512_update_ret(mbedtls_sha512_context *ctx, left = 0; } - while (ilen >= 128) { - if ((ret = mbedtls_internal_sha512_process(ctx, input)) != 0) { - return ret; + while (ilen >= SHA512_BLOCK_SIZE) { + size_t processed = + mbedtls_internal_sha512_process_many(ctx, input, ilen); + if (processed < SHA512_BLOCK_SIZE) { + return MBEDTLS_ERR_ERROR_GENERIC_ERROR; } - input += 128; - ilen -= 128; + input += processed; + ilen -= processed; } if (ilen > 0) { @@ -332,27 +808,16 @@ int mbedtls_sha512_update_ret(mbedtls_sha512_context *ctx, return 0; } -#if !defined(MBEDTLS_DEPRECATED_REMOVED) -void mbedtls_sha512_update(mbedtls_sha512_context *ctx, - const unsigned char *input, - size_t ilen) -{ - mbedtls_sha512_update_ret(ctx, input, ilen); -} -#endif - /* * SHA-512 final digest */ -int mbedtls_sha512_finish_ret(mbedtls_sha512_context *ctx, - unsigned char output[64]) +int mbedtls_sha512_finish(mbedtls_sha512_context *ctx, + unsigned char *output) { int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; unsigned used; uint64_t high, low; - - SHA512_VALIDATE_RET(ctx != NULL); - SHA512_VALIDATE_RET((unsigned char *) output != NULL); + int truncated = 0; /* * Add padding: 0x80 then 0x00 until 16 bytes remain for the length @@ -366,10 +831,10 @@ int mbedtls_sha512_finish_ret(mbedtls_sha512_context *ctx, memset(ctx->buffer + used, 0, 112 - used); } else { /* We'll need an extra block */ - memset(ctx->buffer + used, 0, 128 - used); + memset(ctx->buffer + used, 0, SHA512_BLOCK_SIZE - used); if ((ret = mbedtls_internal_sha512_process(ctx, ctx->buffer)) != 0) { - return ret; + goto exit; } memset(ctx->buffer, 0, 112); @@ -386,7 +851,7 @@ int mbedtls_sha512_finish_ret(mbedtls_sha512_context *ctx, sha512_put_uint64_be(low, ctx->buffer, 120); if ((ret = mbedtls_internal_sha512_process(ctx, ctx->buffer)) != 0) { - return ret; + goto exit; } /* @@ -399,8 +864,7 @@ int mbedtls_sha512_finish_ret(mbedtls_sha512_context *ctx, sha512_put_uint64_be(ctx->state[4], output, 32); sha512_put_uint64_be(ctx->state[5], output, 40); - int truncated = 0; -#if !defined(MBEDTLS_SHA512_NO_SHA384) +#if defined(MBEDTLS_SHA384_C) truncated = ctx->is384; #endif if (!truncated) { @@ -408,49 +872,51 @@ int mbedtls_sha512_finish_ret(mbedtls_sha512_context *ctx, sha512_put_uint64_be(ctx->state[7], output, 56); } - return 0; -} + ret = 0; -#if !defined(MBEDTLS_DEPRECATED_REMOVED) -void mbedtls_sha512_finish(mbedtls_sha512_context *ctx, - unsigned char output[64]) -{ - mbedtls_sha512_finish_ret(ctx, output); +exit: + mbedtls_sha512_free(ctx); + return ret; } -#endif #endif /* !MBEDTLS_SHA512_ALT */ /* * output = SHA-512( input buffer ) */ -int mbedtls_sha512_ret(const unsigned char *input, - size_t ilen, - unsigned char output[64], - int is384) +int mbedtls_sha512(const unsigned char *input, + size_t ilen, + unsigned char *output, + int is384) { int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; mbedtls_sha512_context ctx; -#if !defined(MBEDTLS_SHA512_NO_SHA384) - SHA512_VALIDATE_RET(is384 == 0 || is384 == 1); -#else - SHA512_VALIDATE_RET(is384 == 0); +#if defined(MBEDTLS_SHA384_C) && defined(MBEDTLS_SHA512_C) + if (is384 != 0 && is384 != 1) { + return MBEDTLS_ERR_SHA512_BAD_INPUT_DATA; + } +#elif defined(MBEDTLS_SHA512_C) + if (is384 != 0) { + return MBEDTLS_ERR_SHA512_BAD_INPUT_DATA; + } +#else /* defined MBEDTLS_SHA384_C only */ + if (is384 == 0) { + return MBEDTLS_ERR_SHA512_BAD_INPUT_DATA; + } #endif - SHA512_VALIDATE_RET(ilen == 0 || input != NULL); - SHA512_VALIDATE_RET((unsigned char *) output != NULL); mbedtls_sha512_init(&ctx); - if ((ret = mbedtls_sha512_starts_ret(&ctx, is384)) != 0) { + if ((ret = mbedtls_sha512_starts(&ctx, is384)) != 0) { goto exit; } - if ((ret = mbedtls_sha512_update_ret(&ctx, input, ilen)) != 0) { + if ((ret = mbedtls_sha512_update(&ctx, input, ilen)) != 0) { goto exit; } - if ((ret = mbedtls_sha512_finish_ret(&ctx, output)) != 0) { + if ((ret = mbedtls_sha512_finish(&ctx, output)) != 0) { goto exit; } @@ -460,22 +926,12 @@ exit: return ret; } -#if !defined(MBEDTLS_DEPRECATED_REMOVED) -void mbedtls_sha512(const unsigned char *input, - size_t ilen, - unsigned char output[64], - int is384) -{ - mbedtls_sha512_ret(input, ilen, output, is384); -} -#endif - #if defined(MBEDTLS_SELF_TEST) /* * FIPS-180-2 test vectors */ -static const unsigned char sha512_test_buf[3][113] = +static const unsigned char sha_test_buf[3][113] = { { "abc" }, { @@ -484,17 +940,19 @@ static const unsigned char sha512_test_buf[3][113] = { "" } }; -static const size_t sha512_test_buflen[3] = +static const size_t sha_test_buflen[3] = { 3, 112, 1000 }; -static const unsigned char sha512_test_sum[][64] = +typedef const unsigned char (sha_test_sum_t)[64]; + +/* + * SHA-384 test vectors + */ +#if defined(MBEDTLS_SHA384_C) +static sha_test_sum_t sha384_test_sum[] = { -#if !defined(MBEDTLS_SHA512_NO_SHA384) - /* - * SHA-384 test vectors - */ { 0xCB, 0x00, 0x75, 0x3F, 0x45, 0xA3, 0x5E, 0x8B, 0xB5, 0xA0, 0x3D, 0x69, 0x9A, 0xC6, 0x50, 0x07, 0x27, 0x2C, 0x32, 0xAB, 0x0E, 0xDE, 0xD1, 0x63, @@ -512,12 +970,16 @@ static const unsigned char sha512_test_sum[][64] = 0xED, 0x14, 0x9E, 0x9C, 0x00, 0xF2, 0x48, 0x52, 0x79, 0x72, 0xCE, 0xC5, 0x70, 0x4C, 0x2A, 0x5B, 0x07, 0xB8, 0xB3, 0xDC, 0x38, 0xEC, 0xC4, 0xEB, - 0xAE, 0x97, 0xDD, 0xD8, 0x7F, 0x3D, 0x89, 0x85 }, -#endif /* !MBEDTLS_SHA512_NO_SHA384 */ + 0xAE, 0x97, 0xDD, 0xD8, 0x7F, 0x3D, 0x89, 0x85 } +}; +#endif /* MBEDTLS_SHA384_C */ - /* - * SHA-512 test vectors - */ +/* + * SHA-512 test vectors + */ +#if defined(MBEDTLS_SHA512_C) +static sha_test_sum_t sha512_test_sum[] = +{ { 0xDD, 0xAF, 0x35, 0xA1, 0x93, 0x61, 0x7A, 0xBA, 0xCC, 0x41, 0x73, 0x49, 0xAE, 0x20, 0x41, 0x31, 0x12, 0xE6, 0xFA, 0x4E, 0x89, 0xA9, 0x7E, 0xA2, @@ -543,19 +1005,23 @@ static const unsigned char sha512_test_sum[][64] = 0xEB, 0x00, 0x9C, 0x5C, 0x2C, 0x49, 0xAA, 0x2E, 0x4E, 0xAD, 0xB2, 0x17, 0xAD, 0x8C, 0xC0, 0x9B } }; +#endif /* MBEDTLS_SHA512_C */ -#define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0])) - -/* - * Checkup routine - */ -int mbedtls_sha512_self_test(int verbose) +static int mbedtls_sha512_common_self_test(int verbose, int is384) { - int i, j, k, buflen, ret = 0; + int i, buflen, ret = 0; unsigned char *buf; unsigned char sha512sum[64]; mbedtls_sha512_context ctx; +#if defined(MBEDTLS_SHA384_C) && defined(MBEDTLS_SHA512_C) + sha_test_sum_t *sha_test_sum = (is384) ? sha384_test_sum : sha512_test_sum; +#elif defined(MBEDTLS_SHA512_C) + sha_test_sum_t *sha_test_sum = sha512_test_sum; +#else + sha_test_sum_t *sha_test_sum = sha384_test_sum; +#endif + buf = mbedtls_calloc(1024, sizeof(unsigned char)); if (NULL == buf) { if (verbose != 0) { @@ -567,44 +1033,37 @@ int mbedtls_sha512_self_test(int verbose) mbedtls_sha512_init(&ctx); - for (i = 0; i < (int) ARRAY_LENGTH(sha512_test_sum); i++) { - j = i % 3; -#if !defined(MBEDTLS_SHA512_NO_SHA384) - k = i < 3; -#else - k = 0; -#endif - + for (i = 0; i < 3; i++) { if (verbose != 0) { - mbedtls_printf(" SHA-%d test #%d: ", 512 - k * 128, j + 1); + mbedtls_printf(" SHA-%d test #%d: ", 512 - is384 * 128, i + 1); } - if ((ret = mbedtls_sha512_starts_ret(&ctx, k)) != 0) { + if ((ret = mbedtls_sha512_starts(&ctx, is384)) != 0) { goto fail; } - if (j == 2) { + if (i == 2) { memset(buf, 'a', buflen = 1000); - for (j = 0; j < 1000; j++) { - ret = mbedtls_sha512_update_ret(&ctx, buf, buflen); + for (int j = 0; j < 1000; j++) { + ret = mbedtls_sha512_update(&ctx, buf, buflen); if (ret != 0) { goto fail; } } } else { - ret = mbedtls_sha512_update_ret(&ctx, sha512_test_buf[j], - sha512_test_buflen[j]); + ret = mbedtls_sha512_update(&ctx, sha_test_buf[i], + sha_test_buflen[i]); if (ret != 0) { goto fail; } } - if ((ret = mbedtls_sha512_finish_ret(&ctx, sha512sum)) != 0) { + if ((ret = mbedtls_sha512_finish(&ctx, sha512sum)) != 0) { goto fail; } - if (memcmp(sha512sum, sha512_test_sum[i], 64 - k * 16) != 0) { + if (memcmp(sha512sum, sha_test_sum[i], 64 - is384 * 16) != 0) { ret = 1; goto fail; } @@ -632,8 +1091,22 @@ exit: return ret; } +#if defined(MBEDTLS_SHA512_C) +int mbedtls_sha512_self_test(int verbose) +{ + return mbedtls_sha512_common_self_test(verbose, 0); +} +#endif /* MBEDTLS_SHA512_C */ + +#if defined(MBEDTLS_SHA384_C) +int mbedtls_sha384_self_test(int verbose) +{ + return mbedtls_sha512_common_self_test(verbose, 1); +} +#endif /* MBEDTLS_SHA384_C */ + #undef ARRAY_LENGTH #endif /* MBEDTLS_SELF_TEST */ -#endif /* MBEDTLS_SHA512_C */ +#endif /* MBEDTLS_SHA512_C || MBEDTLS_SHA384_C */ |