Skip to content

Commit d606814

Browse files
committed
fix(simd.h): gather_mask was wrong for no-simd fallback (AcademySoftwareFoundation#4183)
The operation it's supposed to be performing is to merge in the gathered value where the mask is set, and leave mask=0 lanes unchanged. But for our "no SIMD" fallback (i.e., no AVX2 for 4- and 8-wide), we incorrectly implemented it as setting the unmasked lanes to 0! This was quite wrong, and unfortunately masked by the fact that our test of the function initialized a variable to 0. Change to a better number. The best number. Signed-off-by: Larry Gritz <[email protected]>
1 parent 8e43838 commit d606814

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

src/include/OpenImageIO/simd.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4426,7 +4426,7 @@ vint4::gather_mask (const vbool_t& mask, const value_t *baseptr, const vint_t& v
44264426
#if OIIO_SIMD_AVX >= 2
44274427
m_simd = _mm_mask_i32gather_epi32 (m_simd, baseptr, vindex, _mm_cvtps_epi32(mask), scale);
44284428
#else
4429-
SIMD_CONSTRUCT (mask[i] ? *(const value_t *)((const char *)baseptr + vindex[i]*scale) : 0);
4429+
SIMD_DO (if (mask[i]) m_val[i] = *(const value_t *)((const char *)baseptr + vindex[i]*scale));
44304430
#endif
44314431
}
44324432

@@ -5309,7 +5309,7 @@ vint8::gather_mask (const vbool_t& mask, const value_t *baseptr, const vint_t& v
53095309
#if OIIO_SIMD_AVX >= 2
53105310
m_simd = _mm256_mask_i32gather_epi32 (m_simd, baseptr, vindex, _mm256_cvtps_epi32(mask), scale);
53115311
#else
5312-
SIMD_CONSTRUCT (mask[i] ? *(const value_t *)((const char *)baseptr + vindex[i]*scale) : 0);
5312+
SIMD_DO (if (mask[i]) m_val[i] = *(const value_t *)((const char *)baseptr + vindex[i]*scale));
53135313
#endif
53145314
}
53155315

@@ -7073,7 +7073,7 @@ vfloat4::gather_mask (const vbool_t& mask, const value_t *baseptr, const vint_t&
70737073
#if OIIO_SIMD_AVX >= 2
70747074
m_simd = _mm_mask_i32gather_ps (m_simd, baseptr, vindex, mask, scale);
70757075
#else
7076-
SIMD_CONSTRUCT (mask[i] ? *(const value_t *)((const char *)baseptr + vindex[i]*scale) : 0);
7076+
SIMD_DO (if (mask[i]) m_val[i] = *(const value_t *)((const char *)baseptr + vindex[i]*scale));
70777077
#endif
70787078
}
70797079

@@ -8981,7 +8981,7 @@ vfloat8::gather_mask (const vbool_t& mask, const value_t *baseptr, const vint_t&
89818981
#if OIIO_SIMD_AVX >= 2
89828982
m_simd = _mm256_mask_i32gather_ps (m_simd, baseptr, vindex, mask, scale);
89838983
#else
8984-
SIMD_CONSTRUCT (mask[i] ? *(const value_t *)((const char *)baseptr + vindex[i]*scale) : 0);
8984+
SIMD_DO (if (mask[i]) m_val[i] = *(const value_t *)((const char *)baseptr + vindex[i]*scale));
89858985
#endif
89868986
}
89878987

src/libutil/simd_test.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -607,9 +607,9 @@ test_gatherscatter()
607607
OIIO_CHECK_SIMD_EQUAL(g, VEC::Iota());
608608

609609
BOOL mask = BOOL::from_bitmask(0x55555555); // every other one
610-
ELEM every_other_iota[] = { 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0 };
611-
gm = 0;
610+
gm = 42;
612611
gm.gather_mask (mask, gather_source.data(), indices);
612+
ELEM every_other_iota[] = { 0, 42, 2, 42, 4, 42, 6, 42, 8, 42, 10, 42, 12, 42, 14, 42 };
613613
OIIO_CHECK_SIMD_EQUAL (gm, VEC(every_other_iota));
614614

615615
std::vector<ELEM> scatter_out (bufsize, (ELEM)-1);
@@ -622,7 +622,7 @@ test_gatherscatter()
622622
OIIO_CHECK_EQUAL (scatter_out[i], ((i%3) == 1 && (i&1) ? i/3 : -1));
623623

624624
benchmark ("gather", [&](const ELEM *d){ VEC v; v.gather (d, indices); return v; }, gather_source.data());
625-
benchmark ("gather_mask", [&](const ELEM *d){ VEC v; v.gather_mask (mask, d, indices); return v; }, gather_source.data());
625+
benchmark ("gather_mask", [&](const ELEM *d){ VEC v = ELEM(0); v.gather_mask (mask, d, indices); return v; }, gather_source.data());
626626
benchmark ("scatter", [&](ELEM *d){ g.scatter (d, indices); return g; }, scatter_out.data());
627627
benchmark ("scatter_mask", [&](ELEM *d){ g.scatter_mask (mask, d, indices); return g; }, scatter_out.data());
628628
}

0 commit comments

Comments
 (0)