diff options
Diffstat (limited to 'thirdparty/astcenc/astcenc_vecmathlib_avx2_8.h')
-rw-r--r-- | thirdparty/astcenc/astcenc_vecmathlib_avx2_8.h | 22 |
1 files changed, 15 insertions, 7 deletions
diff --git a/thirdparty/astcenc/astcenc_vecmathlib_avx2_8.h b/thirdparty/astcenc/astcenc_vecmathlib_avx2_8.h index a785aca75b..3ca25e35e1 100644 --- a/thirdparty/astcenc/astcenc_vecmathlib_avx2_8.h +++ b/thirdparty/astcenc/astcenc_vecmathlib_avx2_8.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2019-2022 Arm Limited +// Copyright 2019-2024 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -242,6 +242,14 @@ struct vint8 } /** + * @brief Factory that returns a vector loaded from unaligned memory. + */ + static ASTCENC_SIMD_INLINE vint8 load(const uint8_t* p) + { + return vint8(_mm256_lddqu_si256(reinterpret_cast<const __m256i*>(p))); + } + + /** * @brief Factory that returns a vector loaded from 32B aligned memory. */ static ASTCENC_SIMD_INLINE vint8 loada(const int* p) @@ -1000,7 +1008,7 @@ ASTCENC_SIMD_INLINE vint8 float_to_int(vfloat8 a) */ ASTCENC_SIMD_INLINE vint8 float_to_int_rtn(vfloat8 a) { - a = round(a); + a = a + vfloat8(0.5f); return vint8(_mm256_cvttps_epi32(a.m)); } @@ -1152,9 +1160,9 @@ ASTCENC_SIMD_INLINE vint8 interleave_rgba8(vint8 r, vint8 g, vint8 b, vint8 a) * * All masked lanes must be at the end of vector, after all non-masked lanes. */ -ASTCENC_SIMD_INLINE void store_lanes_masked(int* base, vint8 data, vmask8 mask) +ASTCENC_SIMD_INLINE void store_lanes_masked(uint8_t* base, vint8 data, vmask8 mask) { - _mm256_maskstore_epi32(base, _mm256_castps_si256(mask.m), data.m); + _mm256_maskstore_epi32(reinterpret_cast<int*>(base), _mm256_castps_si256(mask.m), data.m); } /** @@ -1162,7 +1170,7 @@ ASTCENC_SIMD_INLINE void store_lanes_masked(int* base, vint8 data, vmask8 mask) */ ASTCENC_SIMD_INLINE void print(vint8 a) { - alignas(ASTCENC_VECALIGN) int v[8]; + alignas(32) int v[8]; storea(a, v); printf("v8_i32:\n %8d %8d %8d %8d %8d %8d %8d %8d\n", v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]); @@ -1173,7 +1181,7 @@ ASTCENC_SIMD_INLINE void print(vint8 a) */ ASTCENC_SIMD_INLINE void printx(vint8 a) { - alignas(ASTCENC_VECALIGN) int v[8]; + alignas(32) int v[8]; storea(a, v); printf("v8_i32:\n %08x %08x %08x %08x %08x %08x %08x %08x\n", v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]); @@ -1184,7 +1192,7 @@ ASTCENC_SIMD_INLINE void printx(vint8 a) */ ASTCENC_SIMD_INLINE void print(vfloat8 a) { - alignas(ASTCENC_VECALIGN) float v[8]; + alignas(32) float v[8]; storea(a, v); printf("v8_f32:\n %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n", static_cast<double>(v[0]), static_cast<double>(v[1]), |