Skip to content

Commit d9c3ba2

Browse files
authored
ggml : disable iq4_nl interleave size 8 (#10709)
ggml-ci
1 parent ce4a7b8 commit d9c3ba2

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3748,16 +3748,18 @@ static block_iq4_nlx4 make_block_iq4_nlx4(block_iq4_nl * in, unsigned int blck_s
37483748

37493749
const int end = QK4_NL * 2 / blck_size_interleave;
37503750

3751-
if (blck_size_interleave == 8) {
3752-
for (int i = 0; i < end; ++i) {
3753-
int src_id = i % 4;
3754-
int src_offset = (i / 4) * blck_size_interleave;
3755-
int dst_offset = i * blck_size_interleave;
3756-
3757-
// Using memcpy to avoid unaligned memory accesses
3758-
memcpy(&out.qs[dst_offset], &in[src_id].qs[src_offset], sizeof(uint64_t));
3759-
}
3760-
} else if (blck_size_interleave == 4) {
3751+
// TODO: this branch seems wrong
3752+
//if (blck_size_interleave == 8) {
3753+
// for (int i = 0; i < end; ++i) {
3754+
// int src_id = i % 4;
3755+
// int src_offset = (i / 4) * blck_size_interleave;
3756+
// int dst_offset = i * blck_size_interleave;
3757+
3758+
// // Using memcpy to avoid unaligned memory accesses
3759+
// memcpy(&out.qs[dst_offset], &in[src_id].qs[src_offset], sizeof(uint64_t));
3760+
// }
3761+
//} else
3762+
if (blck_size_interleave == 4) {
37613763
for (int i = 0; i < end; ++i) {
37623764
int src_id = i % 4;
37633765
int src_offset = (i / 4) * blck_size_interleave;
@@ -3774,7 +3776,8 @@ static block_iq4_nlx4 make_block_iq4_nlx4(block_iq4_nl * in, unsigned int blck_s
37743776

37753777
static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
37763778
GGML_ASSERT(t->type == GGML_TYPE_IQ4_NL);
3777-
GGML_ASSERT(interleave_block == 4 || interleave_block == 8);
3779+
//GGML_ASSERT(interleave_block == 4 || interleave_block == 8);
3780+
GGML_ASSERT(interleave_block == 4);
37783781

37793782
block_iq4_nlx4 * dst = (block_iq4_nlx4 *)t->data;
37803783
const block_iq4_nl * src = (const block_iq4_nl *)data;
@@ -3825,9 +3828,10 @@ template <> int repack<block_iq4_nl, 4, 4>(struct ggml_tensor * t, const void *
38253828
return repack_iq4_nl_to_iq4_nl_4_bl(t, 4, data, data_size);
38263829
}
38273830

3828-
template <> int repack<block_iq4_nl, 8, 4>(struct ggml_tensor * t, const void * data, size_t data_size) {
3829-
return repack_iq4_nl_to_iq4_nl_4_bl(t, 8, data, data_size);
3830-
}
3831+
// TODO: needs to be revisited
3832+
//template <> int repack<block_iq4_nl, 8, 4>(struct ggml_tensor * t, const void * data, size_t data_size) {
3833+
// return repack_iq4_nl_to_iq4_nl_4_bl(t, 8, data, data_size);
3834+
//}
38313835

38323836
// gemv
38333837
template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS>

0 commit comments

Comments
 (0)