Skip to content

Commit b9ebbcd

Browse files
committed
fix(simd.h): Address NEON issues (AcademySoftwareFoundation#4143)
Primarily, recent changes (PR AcademySoftwareFoundation#4071) to vint4::store for the NEON case appear to have some type mismatches, which apple clang on ARM-based Mac (including our CI) seems ok with, but which is generating type errors on other ARM Linux platforms. I think the types were weird here, so I tightened it up to get the types right for temporary variables in that function. That's the primary fix here. Secondarily, I modified simd.h and the CMake setup so that build option USE_SIMD=0 will disable NEON in the same way that it disables SSE. (I realized that USE_SIMD=0 was not disabling NEON, so there was no way for a NEON platform to completely disable SIMD if they needed to.) Fixes AcademySoftwareFoundation#4111 Signed-off-by: Larry Gritz <[email protected]>
1 parent b31a01b commit b9ebbcd

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

src/cmake/compiler.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ set (SIMD_COMPILE_FLAGS "")
300300
if (NOT USE_SIMD STREQUAL "")
301301
message (STATUS "Compiling with SIMD level ${USE_SIMD}")
302302
if (USE_SIMD STREQUAL "0")
303-
set (SIMD_COMPILE_FLAGS ${SIMD_COMPILE_FLAGS} "-DOIIO_NO_SSE=1")
303+
set (SIMD_COMPILE_FLAGS ${SIMD_COMPILE_FLAGS} "-DOIIO_NO_SIMD=1")
304304
else ()
305305
string (REPLACE "," ";" SIMD_FEATURE_LIST ${USE_SIMD})
306306
foreach (feature ${SIMD_FEATURE_LIST})

src/include/OpenImageIO/simd.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@
8181
// OIIO_SIMD_HAS_SIMD8 : nonzero if vfloat8, vint8, vbool8 are defined
8282
// OIIO_SIMD_HAS_SIMD16 : nonzero if vfloat16, vint16, vbool16 are defined
8383

84+
#ifdef OIIO_NO_SIMD /* Request to disable all SIMD */
85+
# define OIIO_NO_SSE 1
86+
# define OIIO_NO_AVX 1
87+
# define OIIO_NO_AVX2 1
88+
# define OIIO_NO_NEON 1
89+
#endif
90+
8491
#if defined(_M_ARM64) || defined(__aarch64__) || defined(__aarch64)
8592
# ifndef __ARM_NEON__
8693
# define __ARM_NEON__
@@ -4803,9 +4810,9 @@ OIIO_FORCEINLINE void vint4::store (unsigned char *values) const {
48034810
_mm_store_ss((float*)values, _mm_castsi128_ps(val8));
48044811
#elif OIIO_SIMD_NEON
48054812
vint4 clamped = m_simd & vint4(0xff);
4806-
simd_t val16 = vcombine_s16(vqmovn_s32(clamped), vdup_n_s16(0));
4807-
simd_t val8 = vcombine_u8(vqmovun_s16(val16), vdup_n_u8(0));
4808-
vst1q_lane_u32((uint32_t*)values, val8, 0);
4813+
int16x8_t val16 = vcombine_s16(vqmovn_s32(clamped), vdup_n_s16(0));
4814+
uint8x16_t val8 = vcombine_u8(vqmovun_s16(val16), vdup_n_u8(0));
4815+
vst1q_lane_u32((uint32_t*)values, vreinterpretq_u32_u8(val8), 0);
48094816
#else
48104817
SIMD_DO (values[i] = m_val[i]);
48114818
#endif

0 commit comments

Comments
 (0)