Skip to content

Commit 778fbf0

Browse files
AlekseiNikiforovIBMmglambda
authored andcommitted
gguf_convert_endian.py: implement byteswapping for q4_k and q6_k (ggml-org#11349)
1 parent 94a0343 commit 778fbf0

File tree

1 file changed

+55
-0
lines changed

1 file changed

+55
-0
lines changed

gguf-py/gguf/scripts/gguf_convert_endian.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
4343
gguf.GGMLQuantizationType.F32,
4444
gguf.GGMLQuantizationType.F16,
4545
gguf.GGMLQuantizationType.Q8_0,
46+
gguf.GGMLQuantizationType.Q4_K,
47+
gguf.GGMLQuantizationType.Q6_K,
4648
):
4749
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
4850
logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
@@ -96,6 +98,59 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
9698
if block_num % 100000 == 0:
9799
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
98100

101+
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q4_K:
102+
# Handle Q4_K tensor blocks (block_q4_k)
103+
# Specific handling of block_q4_k is required.
104+
# Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
105+
106+
# first flatten structure
107+
newshape = 1
108+
for i in tensor.data.shape:
109+
newshape *= i
110+
111+
tensor.data.resize(newshape)
112+
113+
block_size = 144
114+
n_blocks = len(tensor.data) // block_size
115+
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
116+
block_offs = block_num * block_size
117+
118+
# Byte-Swap f16 sized fields
119+
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
120+
delta.byteswap(inplace=True)
121+
122+
delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
123+
delta.byteswap(inplace=True)
124+
125+
# Byte-Swap
126+
if block_num % 100000 == 0:
127+
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
128+
129+
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q6_K:
130+
# Handle Q6_K tensor blocks (block_q6_k)
131+
# Specific handling of block_q6_k is required.
132+
# Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
133+
134+
# first flatten structure
135+
newshape = 1
136+
for i in tensor.data.shape:
137+
newshape *= i
138+
139+
tensor.data.resize(newshape)
140+
141+
block_size = 210
142+
n_blocks = len(tensor.data) // block_size
143+
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
144+
block_offs = block_num * block_size
145+
146+
# Byte-Swap f16 sized field
147+
delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
148+
delta.byteswap(inplace=True)
149+
150+
# Byte-Swap
151+
if block_num % 100000 == 0:
152+
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
153+
99154
else:
100155
# Handle other tensor types
101156
tensor.data.byteswap(inplace=True)

0 commit comments

Comments
 (0)