Git commit
None.
I downloaded the precompiled versions from the releases page.
Operating System & Version
Windows 11 25H2
GGML backends
Vulkan, HIP
Command-line arguments used
sd-cli.exe --cfg-scale 4.0 --sampling-method euler --color -v --chroma-disable-dit-mask --diffusion-model ..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf --vae ..\chroma-unlocked-45\ae.safetensors --t5xxl ..\chroma-unlocked-45\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'chroma.cpp'"
Steps to reproduce
- Downloaded the following files
-
Downloaded the rocm and vulkan releases from here:
https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-585-44cca3d
-
Tried to generate some images with both versions of Chroma and crashed with these errors:
Vulkan: D:\a\stable-diffusion.cpp\stable-diffusion.cpp\ggml\src\ggml-backend.cpp:290: GGML_ASSERT(buf != NULL && "tensor buffer not set") failed
Rocm: D:/a/stable-diffusion.cpp/stable-diffusion.cpp/ggml/src/ggml-backend.cpp:290: GGML_ASSERT(buf != NULL && "tensor buffer not set") failed
What you expected to happen
Generate the images
What actually happened
Crashed with these asserts:
Vulkan:
D:\a\stable-diffusion.cpp\stable-diffusion.cpp\ggml\src\ggml-backend.cpp:290: GGML_ASSERT(buf != NULL && "tensor buffer not set") failed
Rocm:
D:/a/stable-diffusion.cpp/stable-diffusion.cpp/ggml/src/ggml-backend.cpp:290: GGML_ASSERT(buf != NULL && "tensor buffer not set") failed
Logs / error messages / stack trace
ROCM version logs:
[DEBUG] main.cpp:547 - version: stable-diffusion.cpp version unknown, commit 44cca3d
[DEBUG] main.cpp:548 - System Info:
SSE3 = 1 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | VSX = 0 |
[DEBUG] main.cpp:549 - SDCliParams {
mode: img_gen,
output_path: "output.png",
image_path: "",
metadata_format: "text",
verbose: true,
color: true,
canny_preprocess: false,
convert_name: false,
preview_method: none,
preview_interval: 1,
preview_path: "preview.png",
preview_fps: 16,
taesd_preview: false,
preview_noisy: false,
metadata_raw: false,
metadata_brief: false,
metadata_all: false
}
[DEBUG] main.cpp:550 - SDContextParams {
n_threads: 16,
model_path: "",
clip_l_path: "",
clip_g_path: "",
clip_vision_path: "",
t5xxl_path: "..\chroma-unlocked-45\t5xxl_fp16.safetensors",
llm_path: "",
llm_vision_path: "",
diffusion_model_path: "..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf",
high_noise_diffusion_model_path: "",
vae_path: "..\chroma-unlocked-45\ae.safetensors",
taesd_path: "",
esrgan_path: "",
control_net_path: "",
embedding_dir: "",
embeddings: {
}
wtype: NONE,
tensor_type_rules: "",
lora_model_dir: ".",
photo_maker_path: "",
rng_type: cuda,
sampler_rng_type: NONE,
offload_params_to_cpu: false,
enable_mmap: false,
control_net_cpu: false,
clip_on_cpu: false,
vae_on_cpu: false,
flash_attn: false,
diffusion_flash_attn: false,
diffusion_conv_direct: false,
vae_conv_direct: false,
circular: false,
circular_x: false,
circular_y: false,
chroma_use_dit_mask: false,
qwen_image_zero_cond_t: false,
chroma_use_t5_mask: false,
chroma_t5_mask_pad: 1,
prediction: NONE,
lora_apply_mode: auto,
force_sdxl_vae_conv_scale: false
}
[DEBUG] main.cpp:551 - SDGenerationParams {
loras: "{
}",
high_noise_loras: "{
}",
prompt: "a lovely cat holding a sign says 'chroma.cpp'",
negative_prompt: "",
clip_skip: -1,
width: -1,
height: -1,
batch_count: 1,
init_image_path: "",
end_image_path: "",
mask_image_path: "",
control_image_path: "",
ref_image_paths: [],
control_video_path: "",
auto_resize_ref_image: true,
increase_ref_index: false,
pm_id_images_dir: "",
pm_id_embed_path: "",
pm_style_strength: 20,
skip_layers: [7, 8, 9],
sample_params: (txt_cfg: 4.00, img_cfg: 4.00, distilled_guidance: 3.50, slg.layer_count: 0, slg.layer_start: 0.01, slg.layer_end: 0.20, slg.scale: 0.00, scheduler: NONE, sample_method: euler, sample_steps: 20, eta: inf, shifted_timestep: 0, flow_shift: inf),
high_noise_skip_layers: [7, 8, 9],
high_noise_sample_params: (txt_cfg: 7.00, img_cfg: 7.00, distilled_guidance: 3.50, slg.layer_count: 0, slg.layer_start: 0.01, slg.layer_end: 0.20, slg.scale: 0.00, scheduler: NONE, sample_method: NONE, sample_steps: 20, eta: inf, shifted_timestep: 0, flow_shift: inf),
custom_sigmas: [],
cache_mode: "",
cache_option: "",
cache: disabled (threshold=inf, start=0.15, end=0.95),
moe_boundary: 0.875,
video_frames: 1,
fps: 16,
vace_strength: 1,
strength: 0.75,
control_strength: 0.9,
seed: 42,
upscale_repeats: 1,
upscale_tile_size: 128,
vae_tiling_params: { 0, 0, 0, 0.5, 0, 0 },
}
[DEBUG] stable-diffusion.cpp:175 - Using CUDA backend
[INFO ] src\ggml_extend.hpp:81 - ggml_cuda_init: found 1 ROCm devices (Total VRAM: 110456 MiB):
[INFO ] src\ggml_extend.hpp:81 - Device 0: AMD Radeon(TM) 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 110456 MiB
[INFO ] stable-diffusion.cpp:269 - loading diffusion model from '..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf'
[INFO ] model.cpp:229 - load ..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf using gguf format
[DEBUG] model.cpp:278 - init from '..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf'
[INFO ] stable-diffusion.cpp:309 - loading t5xxl from '..\chroma-unlocked-45\t5xxl_fp16.safetensors'
[INFO ] model.cpp:232 - load ..\chroma-unlocked-45\t5xxl_fp16.safetensors using safetensors format
[DEBUG] model.cpp:307 - init from '..\chroma-unlocked-45\t5xxl_fp16.safetensors', prefix = 'text_encoders.t5xxl.transformer.'
[INFO ] stable-diffusion.cpp:330 - loading vae from '..\chroma-unlocked-45\ae.safetensors'
[INFO ] model.cpp:232 - load ..\chroma-unlocked-45\ae.safetensors using safetensors format
[DEBUG] model.cpp:307 - init from '..\chroma-unlocked-45\ae.safetensors', prefix = 'vae.'
[INFO ] stable-diffusion.cpp:355 - Version: Flux
[INFO ] stable-diffusion.cpp:383 - Weight type stat: f32: 644 | f16: 219 | q8_0: 228 | bf16: 15
[INFO ] stable-diffusion.cpp:384 - Conditioner weight type stat: f16: 219
[INFO ] stable-diffusion.cpp:385 - Diffusion model weight type stat: f32: 400 | q8_0: 228 | bf16: 15
[INFO ] stable-diffusion.cpp:386 - VAE weight type stat: f32: 244
[DEBUG] stable-diffusion.cpp:388 - ggml tensor size = 400 bytes
[INFO ] src\flux.hpp:1290 - flux: depth = 19, depth_single_blocks = 38, guidance_embed = false, context_in_dim = 4096, hidden_size = 3072, num_heads = 24
[INFO ] src\flux.hpp:1292 - Using pruned modulation (Chroma)
[DEBUG] src\ggml_extend.hpp:2050 - t5 params backend buffer size = 9083.77 MB(VRAM) (219 tensors)
[DEBUG] src\ggml_extend.hpp:2050 - flux params backend buffer size = 9284.37 MB(VRAM) (643 tensors)
[INFO ] stable-diffusion.cpp:681 - using VAE for encoding / decoding
[INFO ] src\auto_encoder_kl.hpp:517 - vae decoder: ch = 128
[DEBUG] src\ggml_extend.hpp:2050 - vae params backend buffer size = 94.57 MB(VRAM) (138 tensors)
[DEBUG] stable-diffusion.cpp:805 - loading weights
[DEBUG] model.cpp:755 - using 16 threads for model loading
[DEBUG] model.cpp:777 - loading tensors from ..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf
|=============================> | 643/1106 - 5.23GB/s
[DEBUG] model.cpp:777 - loading tensors from ..\chroma-unlocked-45\t5xxl_fp16.safetensors
|======================================> | 862/1106 - 5.27GB/s
[DEBUG] model.cpp:777 - loading tensors from ..\chroma-unlocked-45\ae.safetensors
|==================================================| 1106/1106 - 5.03GB/s
[INFO ] model.cpp:1012 - loading tensors completed, taking 3.61s (process: 0.00s, read: 2.91s, memcpy: 0.00s, convert: 0.00s, copy_to_backend: 0.17s)
[DEBUG] stable-diffusion.cpp:845 - finished loaded file
[INFO ] stable-diffusion.cpp:912 - total params memory size = 18462.71MB (VRAM 18462.71MB, RAM 0.00MB): text_encoders 9083.77MB(VRAM), diffusion_model 9284.37MB(VRAM), vae 94.57MB(VRAM), controlnet 0.00MB(VRAM), pmid 0.00MB(VRAM)
[INFO ] stable-diffusion.cpp:986 - running in Flux FLOW mode
[INFO ] stable-diffusion.cpp:3160 - generate_image 512x512
[INFO ] src\denoiser.hpp:499 - get_sigmas with discrete scheduler
[INFO ] stable-diffusion.cpp:2736 - sampling using Euler method
[DEBUG] src\conditioner.hpp:1376 - parse 'a lovely cat holding a sign says 'chroma.cpp'' to [['a lovely cat holding a sign says 'chroma.cpp'', 1], ]
[DEBUG] t5_unigram_tokenizer.cpp:336 - split prompt "a lovely cat holding a sign says 'chroma.cpp'" to tokens ["?", "a", "?lovely", "?cat", "?holding", "?", "a", "?sign", "?says", "?", "'", "chro", "m", "a", ".", "c", "pp", "'", ]
[DEBUG] src\ggml_extend.hpp:1862 - t5 compute buffer size: 233.00 MB(VRAM)
[DEBUG] src\conditioner.hpp:1468 - computing condition graph completed, taking 1304 ms
[DEBUG] src\conditioner.hpp:1376 - parse '' to [['', 1], ]
[DEBUG] t5_unigram_tokenizer.cpp:336 - split prompt "" to tokens ["?", ]
[DEBUG] src\ggml_extend.hpp:1862 - t5 compute buffer size: 233.00 MB(VRAM)
[DEBUG] src\conditioner.hpp:1468 - computing condition graph completed, taking 1216 ms
[INFO ] stable-diffusion.cpp:3090 - get_learned_condition completed, taking 2.52s
[INFO ] stable-diffusion.cpp:3194 - generating image: 1/1 - seed 42
[DEBUG] src\ggml_extend.hpp:1862 - flux compute buffer size: 470.50 MB(VRAM)
D:/a/stable-diffusion.cpp/stable-diffusion.cpp/ggml/src/ggml-backend.cpp:290: GGML_ASSERT(buf != NULL && "tensor buffer not set") failed
Vulkan logs:
[DEBUG] main.cpp:547 - version: stable-diffusion.cpp version unknown, commit 44cca3d
[DEBUG] main.cpp:548 - System Info:
SSE3 = 1 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | VSX = 0 |
[DEBUG] main.cpp:549 - SDCliParams {
mode: img_gen,
output_path: "output.png",
image_path: "",
metadata_format: "text",
verbose: true,
color: true,
canny_preprocess: false,
convert_name: false,
preview_method: none,
preview_interval: 1,
preview_path: "preview.png",
preview_fps: 16,
taesd_preview: false,
preview_noisy: false,
metadata_raw: false,
metadata_brief: false,
metadata_all: false
}
[DEBUG] main.cpp:550 - SDContextParams {
n_threads: 16,
model_path: "",
clip_l_path: "",
clip_g_path: "",
clip_vision_path: "",
t5xxl_path: "..\chroma-unlocked-45\t5xxl_fp16.safetensors",
llm_path: "",
llm_vision_path: "",
diffusion_model_path: "..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf",
high_noise_diffusion_model_path: "",
vae_path: "..\chroma-unlocked-45\ae.safetensors",
taesd_path: "",
esrgan_path: "",
control_net_path: "",
embedding_dir: "",
embeddings: {
}
wtype: NONE,
tensor_type_rules: "",
lora_model_dir: ".",
photo_maker_path: "",
rng_type: cuda,
sampler_rng_type: NONE,
offload_params_to_cpu: false,
enable_mmap: false,
control_net_cpu: false,
clip_on_cpu: false,
vae_on_cpu: false,
flash_attn: false,
diffusion_flash_attn: false,
diffusion_conv_direct: false,
vae_conv_direct: false,
circular: false,
circular_x: false,
circular_y: false,
chroma_use_dit_mask: false,
qwen_image_zero_cond_t: false,
chroma_use_t5_mask: false,
chroma_t5_mask_pad: 1,
prediction: NONE,
lora_apply_mode: auto,
force_sdxl_vae_conv_scale: false
}
[DEBUG] main.cpp:551 - SDGenerationParams {
loras: "{
}",
high_noise_loras: "{
}",
prompt: "a lovely cat holding a sign says 'chroma.cpp'",
negative_prompt: "",
clip_skip: -1,
width: -1,
height: -1,
batch_count: 1,
init_image_path: "",
end_image_path: "",
mask_image_path: "",
control_image_path: "",
ref_image_paths: [],
control_video_path: "",
auto_resize_ref_image: true,
increase_ref_index: false,
pm_id_images_dir: "",
pm_id_embed_path: "",
pm_style_strength: 20,
skip_layers: [7, 8, 9],
sample_params: (txt_cfg: 4.00, img_cfg: 4.00, distilled_guidance: 3.50, slg.layer_count: 0, slg.layer_start: 0.01, slg.layer_end: 0.20, slg.scale: 0.00, scheduler: NONE, sample_method: euler, sample_steps: 20, eta: inf, shifted_timestep: 0, flow_shift: inf),
high_noise_skip_layers: [7, 8, 9],
high_noise_sample_params: (txt_cfg: 7.00, img_cfg: 7.00, distilled_guidance: 3.50, slg.layer_count: 0, slg.layer_start: 0.01, slg.layer_end: 0.20, slg.scale: 0.00, scheduler: NONE, sample_method: NONE, sample_steps: 20, eta: inf, shifted_timestep: 0, flow_shift: inf),
custom_sigmas: [],
cache_mode: "",
cache_option: "",
cache: disabled (threshold=inf, start=0.15, end=0.95),
moe_boundary: 0.875,
video_frames: 1,
fps: 16,
vace_strength: 1,
strength: 0.75,
control_strength: 0.9,
seed: 42,
upscale_repeats: 1,
upscale_tile_size: 128,
vae_tiling_params: { 0, 0, 0, 0.5, 0, 0 },
}
[DEBUG] stable-diffusion.cpp:183 - Using Vulkan backend
[DEBUG] ggml_extend.hpp:78 - ggml_vulkan: Found 1 Vulkan devices:
[DEBUG] ggml_extend.hpp:78 - ggml_vulkan: 0 = AMD Radeon(TM) 8060S Graphics (AMD proprietary driver) | uma: 1 | fp16: 1 | bf16: 1 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
[INFO ] stable-diffusion.cpp:204 - Vulkan: Using device 0
[INFO ] stable-diffusion.cpp:269 - loading diffusion model from '..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf'
[INFO ] model.cpp:229 - load ..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf using gguf format
[DEBUG] model.cpp:278 - init from '..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf'
[INFO ] stable-diffusion.cpp:309 - loading t5xxl from '..\chroma-unlocked-45\t5xxl_fp16.safetensors'
[INFO ] model.cpp:232 - load ..\chroma-unlocked-45\t5xxl_fp16.safetensors using safetensors format
[DEBUG] model.cpp:307 - init from '..\chroma-unlocked-45\t5xxl_fp16.safetensors', prefix = 'text_encoders.t5xxl.transformer.'
[INFO ] stable-diffusion.cpp:330 - loading vae from '..\chroma-unlocked-45\ae.safetensors'
[INFO ] model.cpp:232 - load ..\chroma-unlocked-45\ae.safetensors using safetensors format
[DEBUG] model.cpp:307 - init from '..\chroma-unlocked-45\ae.safetensors', prefix = 'vae.'
[INFO ] stable-diffusion.cpp:355 - Version: Flux
[INFO ] stable-diffusion.cpp:383 - Weight type stat: f32: 644 | f16: 219 | q8_0: 228 | bf16: 15
[INFO ] stable-diffusion.cpp:384 - Conditioner weight type stat: f16: 219
[INFO ] stable-diffusion.cpp:385 - Diffusion model weight type stat: f32: 400 | q8_0: 228 | bf16: 15
[INFO ] stable-diffusion.cpp:386 - VAE weight type stat: f32: 244
[DEBUG] stable-diffusion.cpp:388 - ggml tensor size = 400 bytes
[INFO ] flux.hpp:1290 - flux: depth = 19, depth_single_blocks = 38, guidance_embed = false, context_in_dim = 4096, hidden_size = 3072, num_heads = 24
[INFO ] flux.hpp:1292 - Using pruned modulation (Chroma)
[DEBUG] ggml_extend.hpp:2050 - t5 params backend buffer size = 9083.77 MB(VRAM) (219 tensors)
[DEBUG] ggml_extend.hpp:2050 - flux params backend buffer size = 9284.37 MB(VRAM) (643 tensors)
[INFO ] stable-diffusion.cpp:681 - using VAE for encoding / decoding
[INFO ] auto_encoder_kl.hpp:517 - vae decoder: ch = 128
[DEBUG] ggml_extend.hpp:2050 - vae params backend buffer size = 94.57 MB(VRAM) (138 tensors)
[DEBUG] stable-diffusion.cpp:805 - loading weights
[DEBUG] model.cpp:755 - using 16 threads for model loading
[DEBUG] model.cpp:777 - loading tensors from ..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf
|=============================> | 643/1106 - 6.26GB/s
[DEBUG] model.cpp:777 - loading tensors from ..\chroma-unlocked-45\t5xxl_fp16.safetensors
|======================================> | 862/1106 - 6.20GB/s
[DEBUG] model.cpp:777 - loading tensors from ..\chroma-unlocked-45\ae.safetensors
|==================================================| 1106/1106 - 5.84GB/s
[INFO ] model.cpp:1012 - loading tensors completed, taking 3.10s (process: 0.00s, read: 2.33s, memcpy: 0.00s, convert: 0.01s, copy_to_backend: 0.25s)
[DEBUG] stable-diffusion.cpp:845 - finished loaded file
[INFO ] stable-diffusion.cpp:912 - total params memory size = 18462.71MB (VRAM 18462.71MB, RAM 0.00MB): text_encoders 9083.77MB(VRAM), diffusion_model 9284.37MB(VRAM), vae 94.57MB(VRAM), controlnet 0.00MB(VRAM), pmid 0.00MB(VRAM)
[INFO ] stable-diffusion.cpp:986 - running in Flux FLOW mode
[INFO ] stable-diffusion.cpp:3160 - generate_image 512x512
[INFO ] denoiser.hpp:499 - get_sigmas with discrete scheduler
[INFO ] stable-diffusion.cpp:2736 - sampling using Euler method
[DEBUG] conditioner.hpp:1376 - parse 'a lovely cat holding a sign says 'chroma.cpp'' to [['a lovely cat holding a sign says 'chroma.cpp'', 1], ]
[DEBUG] t5_unigram_tokenizer.cpp:336 - split prompt "a lovely cat holding a sign says 'chroma.cpp'" to tokens ["?", "a", "?lovely", "?cat", "?holding", "?", "a", "?sign", "?says", "?", "'", "chro", "m", "a", ".", "c", "pp", "'", ]
[DEBUG] ggml_extend.hpp:1862 - t5 compute buffer size: 233.00 MB(VRAM)
[DEBUG] conditioner.hpp:1468 - computing condition graph completed, taking 1068 ms
[DEBUG] conditioner.hpp:1376 - parse '' to [['', 1], ]
[DEBUG] t5_unigram_tokenizer.cpp:336 - split prompt "" to tokens ["?", ]
[DEBUG] ggml_extend.hpp:1862 - t5 compute buffer size: 233.00 MB(VRAM)
[DEBUG] conditioner.hpp:1468 - computing condition graph completed, taking 1077 ms
[INFO ] stable-diffusion.cpp:3090 - get_learned_condition completed, taking 2.15s
[INFO ] stable-diffusion.cpp:3194 - generating image: 1/1 - seed 42
[DEBUG] ggml_extend.hpp:1862 - flux compute buffer size: 470.50 MB(VRAM)
D:\a\stable-diffusion.cpp\stable-diffusion.cpp\ggml\src\ggml-backend.cpp:290: GGML_ASSERT(buf != NULL && "tensor buffer not set") failed
Additional context / environment details
Minisforum MS-S1 Max
CPU: AMD RYZEN AI MAX+ 395
RAM: 128 GB (96 GB allocated as VRAM, 32 GB allocated as RAM)
GPU: AMD Radeon(TM) 8060S
ROCM: 6.4
Git commit
None.
I downloaded the precompiled versions from the releases page.
Operating System & Version
Windows 11 25H2
GGML backends
Vulkan, HIP
Command-line arguments used
sd-cli.exe --cfg-scale 4.0 --sampling-method euler --color -v --chroma-disable-dit-mask --diffusion-model ..\chroma-unlocked-45\chroma-unlocked-v40-Q8_0.gguf --vae ..\chroma-unlocked-45\ae.safetensors --t5xxl ..\chroma-unlocked-45\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'chroma.cpp'"
Steps to reproduce
Downloaded the rocm and vulkan releases from here:
https://github.com/leejet/stable-diffusion.cpp/releases/tag/master-585-44cca3d
Tried to generate some images with both versions of Chroma and crashed with these errors:
Vulkan: D:\a\stable-diffusion.cpp\stable-diffusion.cpp\ggml\src\ggml-backend.cpp:290: GGML_ASSERT(buf != NULL && "tensor buffer not set") failed
Rocm: D:/a/stable-diffusion.cpp/stable-diffusion.cpp/ggml/src/ggml-backend.cpp:290: GGML_ASSERT(buf != NULL && "tensor buffer not set") failed
What you expected to happen
Generate the images
What actually happened
Crashed with these asserts:
Vulkan:
Rocm:
Logs / error messages / stack trace
ROCM version logs:
Vulkan logs:
Additional context / environment details
Minisforum MS-S1 Max
CPU: AMD RYZEN AI MAX+ 395
RAM: 128 GB (96 GB allocated as VRAM, 32 GB allocated as RAM)
GPU: AMD Radeon(TM) 8060S
ROCM: 6.4