Skip to content

Commit c0a1a5d

Browse files
NindalethBlack_Fox
authored andcommitted
gguf-dump : support i-quants (ggml-org#5841)
Co-authored-by: Black_Fox <[email protected]>
1 parent abb8e00 commit c0a1a5d

File tree

1 file changed

+44
-28
lines changed

1 file changed

+44
-28
lines changed

gguf-py/gguf/constants.py

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -604,20 +604,28 @@ class PoolingType(IntEnum):
604604

605605

606606
class GGMLQuantizationType(IntEnum):
607-
F32 = 0
608-
F16 = 1
609-
Q4_0 = 2
610-
Q4_1 = 3
611-
Q5_0 = 6
612-
Q5_1 = 7
613-
Q8_0 = 8
614-
Q8_1 = 9
615-
Q2_K = 10
616-
Q3_K = 11
617-
Q4_K = 12
618-
Q5_K = 13
619-
Q6_K = 14
620-
Q8_K = 15
607+
F32 = 0
608+
F16 = 1
609+
Q4_0 = 2
610+
Q4_1 = 3
611+
Q5_0 = 6
612+
Q5_1 = 7
613+
Q8_0 = 8
614+
Q8_1 = 9
615+
Q2_K = 10
616+
Q3_K = 11
617+
Q4_K = 12
618+
Q5_K = 13
619+
Q6_K = 14
620+
Q8_K = 15
621+
IQ2_XXS = 16
622+
IQ2_XS = 17
623+
IQ3_XXS = 18
624+
IQ1_S = 19
625+
IQ4_NL = 20
626+
IQ3_S = 21
627+
IQ2_S = 22
628+
IQ4_XS = 23
621629

622630

623631
class GGUFEndian(IntEnum):
@@ -662,20 +670,28 @@ def get_type(val: Any) -> GGUFValueType:
662670
QK_K = 256
663671
# Items here are (block size, type size)
664672
GGML_QUANT_SIZES = {
665-
GGMLQuantizationType.F32: (1, 4),
666-
GGMLQuantizationType.F16: (1, 2),
667-
GGMLQuantizationType.Q4_0: (32, 2 + 16),
668-
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
669-
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
670-
GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
671-
GGMLQuantizationType.Q8_0: (32, 2 + 32),
672-
GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
673-
GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
674-
GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
675-
GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
676-
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
677-
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
678-
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
673+
GGMLQuantizationType.F32: (1, 4),
674+
GGMLQuantizationType.F16: (1, 2),
675+
GGMLQuantizationType.Q4_0: (32, 2 + 16),
676+
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
677+
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
678+
GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
679+
GGMLQuantizationType.Q8_0: (32, 2 + 32),
680+
GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
681+
GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
682+
GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
683+
GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
684+
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
685+
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
686+
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
687+
GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
688+
GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
689+
GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
690+
GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
691+
GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
692+
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
693+
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
694+
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
679695
}
680696

681697

0 commit comments

Comments
 (0)