@@ -3748,16 +3748,18 @@ static block_iq4_nlx4 make_block_iq4_nlx4(block_iq4_nl * in, unsigned int blck_s
3748
3748
3749
3749
const int end = QK4_NL * 2 / blck_size_interleave;
3750
3750
3751
- if (blck_size_interleave == 8 ) {
3752
- for (int i = 0 ; i < end; ++i) {
3753
- int src_id = i % 4 ;
3754
- int src_offset = (i / 4 ) * blck_size_interleave;
3755
- int dst_offset = i * blck_size_interleave;
3756
-
3757
- // Using memcpy to avoid unaligned memory accesses
3758
- memcpy (&out.qs [dst_offset], &in[src_id].qs [src_offset], sizeof (uint64_t ));
3759
- }
3760
- } else if (blck_size_interleave == 4 ) {
3751
+ // TODO: this branch seems wrong
3752
+ // if (blck_size_interleave == 8) {
3753
+ // for (int i = 0; i < end; ++i) {
3754
+ // int src_id = i % 4;
3755
+ // int src_offset = (i / 4) * blck_size_interleave;
3756
+ // int dst_offset = i * blck_size_interleave;
3757
+
3758
+ // // Using memcpy to avoid unaligned memory accesses
3759
+ // memcpy(&out.qs[dst_offset], &in[src_id].qs[src_offset], sizeof(uint64_t));
3760
+ // }
3761
+ // } else
3762
+ if (blck_size_interleave == 4 ) {
3761
3763
for (int i = 0 ; i < end; ++i) {
3762
3764
int src_id = i % 4 ;
3763
3765
int src_offset = (i / 4 ) * blck_size_interleave;
@@ -3774,7 +3776,8 @@ static block_iq4_nlx4 make_block_iq4_nlx4(block_iq4_nl * in, unsigned int blck_s
3774
3776
3775
3777
static int repack_iq4_nl_to_iq4_nl_4_bl (struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
3776
3778
GGML_ASSERT (t->type == GGML_TYPE_IQ4_NL);
3777
- GGML_ASSERT (interleave_block == 4 || interleave_block == 8 );
3779
+ // GGML_ASSERT(interleave_block == 4 || interleave_block == 8);
3780
+ GGML_ASSERT (interleave_block == 4 );
3778
3781
3779
3782
block_iq4_nlx4 * dst = (block_iq4_nlx4 *)t->data ;
3780
3783
const block_iq4_nl * src = (const block_iq4_nl *)data;
@@ -3825,9 +3828,10 @@ template <> int repack<block_iq4_nl, 4, 4>(struct ggml_tensor * t, const void *
3825
3828
return repack_iq4_nl_to_iq4_nl_4_bl (t, 4 , data, data_size);
3826
3829
}
3827
3830
3828
- template <> int repack<block_iq4_nl, 8 , 4 >(struct ggml_tensor * t, const void * data, size_t data_size) {
3829
- return repack_iq4_nl_to_iq4_nl_4_bl (t, 8 , data, data_size);
3830
- }
3831
+ // TODO: needs to be revisited
3832
+ // template <> int repack<block_iq4_nl, 8, 4>(struct ggml_tensor * t, const void * data, size_t data_size) {
3833
+ // return repack_iq4_nl_to_iq4_nl_4_bl(t, 8, data, data_size);
3834
+ // }
3831
3835
3832
3836
// gemv
3833
3837
template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS>
0 commit comments