Skip to content

Commit 89b1c07

Browse files
authored
Merge pull request #7 from nicoboss/outtype-source
convert : Add --outtype source to convert_hf_to_gguf.py
2 parents 8bcd7d6 + 629d45b commit 89b1c07

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,16 @@ def prepare_tensors(self):
351351
data_qtype = gguf.GGMLQuantizationType.TQ1_0
352352
elif self.ftype == gguf.LlamaFileType.MOSTLY_TQ2_0:
353353
data_qtype = gguf.GGMLQuantizationType.TQ2_0
354+
elif self.ftype == gguf.LlamaFileType.MOSTLY_SOURCE:
355+
if old_dtype == torch.float16:
356+
data_qtype = gguf.GGMLQuantizationType.F16
357+
elif old_dtype == torch.bfloat16:
358+
data_qtype = gguf.GGMLQuantizationType.BF16
359+
elif old_dtype == torch.float32:
360+
data_qtype = gguf.GGMLQuantizationType.F32
361+
else:
362+
logger.warning(f"Cannot find destination type matching {old_dtype}: Using F16")
363+
data_qtype = gguf.GGMLQuantizationType.F16
354364
else:
355365
raise ValueError(f"Unknown file type: {self.ftype.name}")
356366

@@ -8164,8 +8174,8 @@ def parse_args() -> argparse.Namespace:
81648174
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
81658175
)
81668176
parser.add_argument(
8167-
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
8168-
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
8177+
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "source", "auto"], default="f16",
8178+
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, source to keep it unchanged, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
81698179
)
81708180
parser.add_argument(
81718181
"--bigendian", action="store_true",
@@ -8308,6 +8318,7 @@ def main() -> None:
83088318
"tq1_0": gguf.LlamaFileType.MOSTLY_TQ1_0,
83098319
"tq2_0": gguf.LlamaFileType.MOSTLY_TQ2_0,
83108320
"auto": gguf.LlamaFileType.GUESSED,
8321+
"source": gguf.LlamaFileType.MOSTLY_SOURCE,
83118322
}
83128323

83138324
is_split = args.split_max_tensors > 0 or args.split_max_size != "0"

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2760,6 +2760,7 @@ class LlamaFileType(IntEnum):
27602760
MOSTLY_TQ2_0 = 37 # except 1d tensors
27612761

27622762
GUESSED = 1024 # not specified in the model file
2763+
MOSTLY_SOURCE = 1025 # not specified in the model file
27632764

27642765

27652766
class GGUFEndian(IntEnum):

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ extern "C" {
154154
LLAMA_FTYPE_MOSTLY_TQ2_0 = 37, // except 1d tensors
155155

156156
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
157+
LLAMA_FTYPE_GUESSED_MOSTLY_SORUCE = 1025, // not specified in the model file
157158
};
158159

159160
enum llama_rope_scaling_type {

0 commit comments

Comments
 (0)