File tree Expand file tree Collapse file tree 1 file changed +18
-0
lines changed
aten/src/ATen/cpu/vec/vec256 Expand file tree Collapse file tree 1 file changed +18
-0
lines changed Original file line number Diff line number Diff line change @@ -223,6 +223,24 @@ struct VecConvert<
223
223
};
224
224
#endif
225
225
226
+ #if defined(CPU_CAPABILITY_NEON)
227
+ template <>
228
+ struct VecConvert <float , 1 , BFloat16, 1 > {
229
+ static inline VectorizedN<float , 1 > apply (
230
+ const VectorizedN<BFloat16, 1 >& src) {
231
+ VectorizedN<float , 1 > result;
232
+ uint16x8_t u16_8 = vld1q_u16 (reinterpret_cast <const uint16_t *>(&src[0 ]));
233
+ int32x4_t shift = vdupq_n_s32 (16 );
234
+ auto u16_low1 = vget_low_u16 (u16_8);
235
+ auto u16_high1 = vget_high_u16 (u16_8);
236
+ float32x4_t f32x4_0 = vreinterpretq_f32_u32 (vshlq_u32 (vmovl_u16 (u16_low1), shift));
237
+ float32x4_t f32x4_1 = vreinterpretq_f32_u32 (vshlq_u32 (vmovl_u16 (u16_high1), shift));
238
+ result[0 ] = {f32x4_0, f32x4_1};
239
+ return result;
240
+ }
241
+ };
242
+ #endif
243
+
226
244
template <typename src_t >
227
245
struct VecConvert <
228
246
float ,
You can’t perform that action at this time.
0 commit comments