@@ -772,22 +772,37 @@ struct LLM_TN {
772
772
llm_arch arch;
773
773
774
774
std::string operator ()(llm_tensor tensor) const {
775
+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
776
+ return " __missing__" ;
777
+ }
775
778
return LLM_TENSOR_NAMES[arch].at (tensor);
776
779
}
777
780
778
781
std::string operator ()(llm_tensor tensor, const std::string & suffix) const {
782
+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
783
+ return " __missing__" ;
784
+ }
779
785
return LLM_TENSOR_NAMES[arch].at (tensor) + " ." + suffix;
780
786
}
781
787
782
788
std::string operator ()(llm_tensor tensor, int bid) const {
789
+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
790
+ return " __missing__" ;
791
+ }
783
792
return ::format (LLM_TENSOR_NAMES[arch].at (tensor).c_str (), bid);
784
793
}
785
794
786
795
std::string operator ()(llm_tensor tensor, const std::string & suffix, int bid) const {
796
+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
797
+ return " __missing__" ;
798
+ }
787
799
return ::format (LLM_TENSOR_NAMES[arch].at (tensor).c_str (), bid) + " ." + suffix;
788
800
}
789
801
790
802
std::string operator ()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const {
803
+ if (LLM_TENSOR_NAMES[arch].find (tensor) == LLM_TENSOR_NAMES[arch].end ()) {
804
+ return " __missing__" ;
805
+ }
791
806
return ::format (LLM_TENSOR_NAMES[arch].at (tensor).c_str (), bid, xid) + " ." + suffix;
792
807
}
793
808
};
@@ -10227,6 +10242,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
10227
10242
}
10228
10243
++qs.i_ffn_up ;
10229
10244
}
10245
+
10230
10246
// if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;
10231
10247
// }
10232
10248
// IK: let's remove this, else Q2_K is almost the same as Q3_K_S
@@ -10286,19 +10302,19 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
10286
10302
10287
10303
// K-quants
10288
10304
case LLAMA_FTYPE_MOSTLY_Q2_K_S:
10289
- case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break ;
10305
+ case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break ;
10290
10306
case LLAMA_FTYPE_MOSTLY_Q3_K_XS:
10291
10307
case LLAMA_FTYPE_MOSTLY_Q3_K_S:
10292
10308
case LLAMA_FTYPE_MOSTLY_Q3_K_M:
10293
- case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break ;
10309
+ case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break ;
10294
10310
case LLAMA_FTYPE_MOSTLY_Q4_K_S:
10295
- case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break ;
10311
+ case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break ;
10296
10312
case LLAMA_FTYPE_MOSTLY_Q5_K_S:
10297
- case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break ;
10298
- case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break ;
10299
- case LLAMA_FTYPE_MOSTLY_IQ2_XXS:quantized_type = GGML_TYPE_IQ2_XXS; break ;
10300
- case LLAMA_FTYPE_MOSTLY_IQ2_XS : quantized_type = GGML_TYPE_IQ2_XS; break ;
10301
- case LLAMA_FTYPE_MOSTLY_IQ3_XXS:quantized_type = GGML_TYPE_IQ3_XXS; break ;
10313
+ case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break ;
10314
+ case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break ;
10315
+ case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break ;
10316
+ case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break ;
10317
+ case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break ;
10302
10318
10303
10319
default : throw std::runtime_error (format (" invalid output file type %d\n " , ftype));
10304
10320
}
0 commit comments