@@ -43,6 +43,8 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
43
43
gguf .GGMLQuantizationType .F32 ,
44
44
gguf .GGMLQuantizationType .F16 ,
45
45
gguf .GGMLQuantizationType .Q8_0 ,
46
+ gguf .GGMLQuantizationType .Q4_K ,
47
+ gguf .GGMLQuantizationType .Q6_K ,
46
48
):
47
49
raise ValueError (f"Cannot handle type { tensor .tensor_type .name } for tensor { repr (tensor .name )} " )
48
50
logger .info (f"* Preparing to convert from { file_endian .upper ()} to { order .upper ()} " )
@@ -96,6 +98,59 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
96
98
if block_num % 100000 == 0 :
97
99
inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
98
100
101
+ elif tensor .tensor_type == gguf .GGMLQuantizationType .Q4_K :
102
+ # Handle Q4_K tensor blocks (block_q4_k)
103
+ # Specific handling of block_q4_k is required.
104
+ # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
105
+
106
+ # first flatten structure
107
+ newshape = 1
108
+ for i in tensor .data .shape :
109
+ newshape *= i
110
+
111
+ tensor .data .resize (newshape )
112
+
113
+ block_size = 144
114
+ n_blocks = len (tensor .data ) // block_size
115
+ for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
116
+ block_offs = block_num * block_size
117
+
118
+ # Byte-Swap f16 sized fields
119
+ delta = tensor .data [block_offs :block_offs + 2 ].view (dtype = np .uint16 )
120
+ delta .byteswap (inplace = True )
121
+
122
+ delta = tensor .data [block_offs + 2 :block_offs + 4 ].view (dtype = np .uint16 )
123
+ delta .byteswap (inplace = True )
124
+
125
+ # Byte-Swap
126
+ if block_num % 100000 == 0 :
127
+ inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
128
+
129
+ elif tensor .tensor_type == gguf .GGMLQuantizationType .Q6_K :
130
+ # Handle Q6_K tensor blocks (block_q6_k)
131
+ # Specific handling of block_q6_k is required.
132
+ # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
133
+
134
+ # first flatten structure
135
+ newshape = 1
136
+ for i in tensor .data .shape :
137
+ newshape *= i
138
+
139
+ tensor .data .resize (newshape )
140
+
141
+ block_size = 210
142
+ n_blocks = len (tensor .data ) // block_size
143
+ for block_num in (inner_pbar := tqdm (range (n_blocks ), desc = "Byte-swapping Blocks" , leave = False )):
144
+ block_offs = block_num * block_size
145
+
146
+ # Byte-Swap f16 sized field
147
+ delta = tensor .data [block_offs + 208 :block_offs + 210 ].view (dtype = np .uint16 )
148
+ delta .byteswap (inplace = True )
149
+
150
+ # Byte-Swap
151
+ if block_num % 100000 == 0 :
152
+ inner_pbar .set_description (f"Byte-swapping Blocks [{ (n_blocks - block_num ) // n_blocks } ]" )
153
+
99
154
else :
100
155
# Handle other tensor types
101
156
tensor .data .byteswap (inplace = True )
0 commit comments