Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions expose.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,13 @@ struct sd_load_model_inputs
{
const char * model_filename = nullptr;
const char * executable_path = nullptr;
const int kcpp_main_gpu = -1;
const int kcpp_main_device = -1;
const int threads = 0;
const int quant = 0;
const bool flash_attention = false;
const bool offload_cpu = false;
const bool vae_cpu = false;
const bool clip_cpu = false;
const int kcpp_vae_device = -1;
const int kcpp_clip_device = -1;
const bool diffusion_conv_direct = false;
const bool vae_conv_direct = false;
const bool taesd = false;
Expand Down
58 changes: 40 additions & 18 deletions koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,13 +358,13 @@ class generation_outputs(ctypes.Structure):
class sd_load_model_inputs(ctypes.Structure):
_fields_ = [("model_filename", ctypes.c_char_p),
("executable_path", ctypes.c_char_p),
("kcpp_main_gpu", ctypes.c_int),
("kcpp_main_device", ctypes.c_int),
("threads", ctypes.c_int),
("quant", ctypes.c_int),
("flash_attention", ctypes.c_bool),
("offload_cpu", ctypes.c_bool),
("vae_cpu", ctypes.c_bool),
("clip_cpu", ctypes.c_bool),
("kcpp_vae_device", ctypes.c_int),
("kcpp_clip_device", ctypes.c_int),
("diffusion_conv_direct", ctypes.c_bool),
("vae_conv_direct", ctypes.c_bool),
("taesd", ctypes.c_bool),
Expand Down Expand Up @@ -2386,6 +2386,24 @@ def sd_quant_option(value):
except Exception:
return 0

sd_device_choices = ['CPU', 'main', '1', '2', '3', '4']

def sd_get_device_number(name, offset=0):
if not name:
return -1
name = name.lower()
aliases = {"cpu": -2, "gpu": -1, "": -1, "main": -1, "default": -1}
if name in aliases:
return aliases[name]
return tryparseint(name, -1) + offset

def sd_get_device_name(value, offset=0):
if value <= -2:
return "CPU"
if value == -1:
return "main"
return value + offset

def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename):
global args
inputs = sd_load_model_inputs()
Expand All @@ -2401,8 +2419,8 @@ def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip
inputs.quant = args.sdquant
inputs.flash_attention = args.sdflashattention
inputs.offload_cpu = args.sdoffloadcpu
inputs.vae_cpu = args.sdvaecpu
inputs.clip_cpu = False if args.sdclipgpu else True
inputs.kcpp_vae_device = args.sdvaedevice
inputs.kcpp_clip_device = args.sdclipdevice
sdconvdirect = sd_convdirect_option(args.sdconvdirect)
inputs.diffusion_conv_direct = sdconvdirect == 'full'
inputs.vae_conv_direct = sdconvdirect in ['vaeonly', 'full']
Expand Down Expand Up @@ -2430,7 +2448,7 @@ def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip
inputs.img_hard_limit = args.sdclamped
inputs.img_soft_limit = args.sdclampedsoft
inputs = set_backend_props(inputs)
inputs.kcpp_main_gpu = args.sdmaingpu
inputs.kcpp_main_device = args.sdmaingpu
ret = handle.sd_load_model(inputs)
return ret

Expand Down Expand Up @@ -7602,8 +7620,8 @@ def hide_tooltip(event):
sd_upscaler_var = ctk.StringVar()
sd_flash_attention_var = ctk.IntVar(value=0)
sd_offload_cpu_var = ctk.IntVar(value=0)
sd_vae_cpu_var = ctk.IntVar(value=0)
sd_clip_gpu_var = ctk.IntVar(value=0)
sd_vae_device_var = ctk.StringVar(value="main")
sd_clip_device_var = ctk.StringVar(value="CPU")
sd_runtime_loras_var = ctk.IntVar(value=0)
sd_vaeauto_var = ctk.IntVar(value=0)
sd_tiled_vae_var = ctk.StringVar(value=str(default_vae_tile_threshold))
Expand Down Expand Up @@ -8468,9 +8486,9 @@ def toggletaesd(a,b,c):
makelabelcombobox(images_tab, "Conv2D Direct:", sd_convdirect_var, row=42, labelpadx=(220), padx=(310), width=90, tooltiptxt="Use Conv2D Direct operation. May save memory or improve performance.\nMight crash if not supported by the backend.\n", values=sd_convdirect_choices)
makelabelentry(images_tab, "VAE Tiling Threshold:", sd_tiled_vae_var, 44, 50, padx=(144),singleline=True,tooltip="Enable VAE Tiling for images above this size, to save memory.\nSet to 0 to disable VAE tiling.")
makecheckbox(images_tab, "SD Flash Attention", sd_flash_attention_var, 44,padx=(230), tooltiptxt="Enable Flash Attention for image diffusion. May save memory or improve performance.")
makecheckbox(images_tab, "Model CPU Offload", sd_offload_cpu_var, 50,padx=8, tooltiptxt="Offload image weights in RAM to save VRAM, swap into VRAM when needed.")
makecheckbox(images_tab, "VAE on CPU", sd_vae_cpu_var, 50,padx=(160), tooltiptxt="Force VAE to CPU only for image generation.")
makecheckbox(images_tab, "CLIP on GPU", sd_clip_gpu_var, 50,padx=(280), tooltiptxt="Put CLIP and T5 to GPU for image generation. Otherwise, CLIP will use CPU.")
makecheckbox(images_tab, "Model Offload", sd_offload_cpu_var, 50,padx=8, tooltiptxt="Offload image weights in RAM to save VRAM, swap into VRAM when needed.")
makelabelcombobox(images_tab, "VAE dev:", sd_vae_device_var, 50,labelpadx=(140),padx=(200), width=70, tooltiptxt="Change VAE device for image generation.", values=sd_device_choices)
makelabelcombobox(images_tab, "CLIP dev:", sd_clip_device_var, 50,labelpadx=(280),padx=340, width=70, tooltiptxt="Change CLIP / T5 / LLM device for image generation.", values=sd_device_choices)

# audio tab
audio_tab = tabcontent["Audio"]
Expand Down Expand Up @@ -8783,8 +8801,8 @@ def export_vars():
args.sdmodel = sd_model_var.get() if sd_model_var.get() != "" else ""
args.sdflashattention = True if sd_flash_attention_var.get()==1 else False
args.sdoffloadcpu = True if sd_offload_cpu_var.get()==1 else False
args.sdvaecpu = True if sd_vae_cpu_var.get()==1 else False
args.sdclipgpu = True if sd_clip_gpu_var.get()==1 else False
args.sdvaedevice = sd_get_device_number(sd_vae_device_var.get(), -1)
args.sdclipdevice = sd_get_device_number(sd_clip_device_var.get(), -1)
args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get()))
args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get()))
args.sdclampedsoft = (0 if int(sd_clamped_soft_var.get())<=0 else int(sd_clamped_soft_var.get()))
Expand Down Expand Up @@ -9072,8 +9090,8 @@ def import_vars(mydict):
sd_quant_var.set(sd_quant_choices[(mydict["sdquant"] if ("sdquant" in mydict and mydict["sdquant"]>=0 and mydict["sdquant"]<len(sd_quant_choices)) else 0)])
sd_flash_attention_var.set(1 if ("sdflashattention" in mydict and mydict["sdflashattention"]) else 0)
sd_offload_cpu_var.set(1 if ("sdoffloadcpu" in mydict and mydict["sdoffloadcpu"]) else 0)
sd_vae_cpu_var.set(1 if ("sdvaecpu" in mydict and mydict["sdvaecpu"]) else 0)
sd_clip_gpu_var.set(1 if ("sdclipgpu" in mydict and mydict["sdclipgpu"]) else 0)
sd_vae_device_var.set(sd_get_device_name(mydict.get("sdvaecpu", -1), 1))
sd_clip_device_var.set(sd_get_device_name(mydict.get("sdclipgpu", -2), 1))
sd_convdirect_var.set(sd_convdirect_option(mydict.get("sdconvdirect")))
sd_vae_var.set(mydict["sdvae"] if ("sdvae" in mydict and mydict["sdvae"]) else "")
sd_t5xxl_var.set(mydict["sdt5xxl"] if ("sdt5xxl" in mydict and mydict["sdt5xxl"]) else "")
Expand Down Expand Up @@ -9582,6 +9600,10 @@ def convert_invalid_args(args):
dict["sdlora"] = sanitize_lora_list(dict["sdlora"])
if "sdloramult" in dict:
dict["sdloramult"] = sanitize_lora_multipliers(dict["sdloramult"])
if "sdclipgpu" in dict and "sdclipdevice" not in dict:
dict["sdclipdevice"] = sd_get_device_number("main" if dict["sdclipgpu"] else "CPU")
if "sdvaecpu" in dict and "sdvaedevice" not in dict:
dict["sdvaedevice"] = sd_get_device_number("CPU" if dict["sdvaecpu"] else "main")
return args

def setuptunnel(global_memory, has_sd, has_music):
Expand Down Expand Up @@ -11590,8 +11612,8 @@ def range_checker(arg: str):
sdparsergroup.add_argument("--sdupscaler", metavar=('[filename]'), help="You can use ESRGAN as an upscaling model to resize images. Leave blank if unused.", default="")
sdparsergroup.add_argument("--sdflashattention", help="Enables Flash Attention for image generation.", action='store_true')
sdparsergroup.add_argument("--sdoffloadcpu", help="Offload image weights in RAM to save VRAM, swap into VRAM when needed.", action='store_true')
sdparsergroup.add_argument("--sdvaecpu", help="Force VAE to CPU only for image generation.", action='store_true')
sdparsergroup.add_argument("--sdclipgpu", help="Put CLIP and T5 to GPU for image generation. Otherwise, CLIP will use CPU.", action='store_true')
sdparsergroup.add_argument("--sdvaedevice", help="VAE device for image generation. GPU index, -1 or 'main' for the main GPU, or 'CPU'.", type=sd_get_device_number, default='main')
sdparsergroup.add_argument("--sdclipdevice", help="CLIP / T5 / LLM device for image generation. GPU index, -1 or 'main' for the main GPU, or 'CPU'.", type=sd_get_device_number, default='CPU')
sdparsergroup.add_argument("--sdconvdirect", help="Enables Conv2D Direct. May improve performance or reduce memory usage. Might crash if not supported by the backend. Can be 'off' (default) to disable, 'full' to turn it on for all operations, or 'vaeonly' to enable only for the VAE.", type=sd_convdirect_option, choices=sd_convdirect_choices, default=sd_convdirect_choices[0])
sdparsergroupvae = sdparsergroup.add_mutually_exclusive_group()
sdparsergroupvae.add_argument("--sdvae", metavar=('[filename]'), help="Specify an image generation safetensors VAE which replaces the one in the model.", default="")
Expand All @@ -11601,7 +11623,7 @@ def range_checker(arg: str):
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify image generation LoRAs safetensors models to be applied. Multiple LoRAs are accepted.", nargs='+')
sdparsergroup.add_argument("--sdloramult", metavar=('[amounts]'), help="Multipliers for the image LoRA model to be applied.", type=float, nargs='+', default=[1.0])
sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold)
sdparsergroup.add_argument("--sdmaingpu", metavar=('[Device ID]'), help="If specified, Image Generation weights will be placed on the selected GPU index", type=int, default=-1)
sdparsergroup.add_argument("--sdmaingpu", metavar=('[Device ID]'), help="If specified, Image Generation weights will be placed on the selected GPU index", type=sd_get_device_number, default='main')

whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="")
Expand Down
2 changes: 1 addition & 1 deletion otherarch/sdcpp/kcpp_sd_extensions.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace kcpp_sd {

void set_sd_log_level(int log);

void config_main_gpu(int value);
void config_device(int value, const std::string& mod = "");

}

Expand Down
10 changes: 6 additions & 4 deletions otherarch/sdcpp/sdtype_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ struct SDParams {
float eta = -1.0f;
float strength = 0.75f;
int64_t seed = 42;
bool clip_on_cpu = false;
bool diffusion_flash_attn = false;
bool diffusion_conv_direct = false;
bool vae_conv_direct = false;
Expand Down Expand Up @@ -285,7 +284,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename);

//kcpp allow gpu id override
config_main_gpu(inputs.kcpp_main_gpu);
config_device(inputs.kcpp_main_device);
config_device(inputs.kcpp_clip_device, "clip");
config_device(inputs.kcpp_vae_device, "vae");
config_device(inputs.kcpp_main_device, "upscaler");

int lora_apply_mode = LORA_APPLY_AT_RUNTIME;
bool lora_dynamic = false;
Expand Down Expand Up @@ -424,8 +426,8 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
params.vae_conv_direct = sd_params->vae_conv_direct;
params.chroma_use_dit_mask = sd_params->chroma_use_dit_mask;
params.offload_params_to_cpu = inputs.offload_cpu;
params.keep_vae_on_cpu = inputs.vae_cpu;
params.keep_clip_on_cpu = inputs.clip_cpu;
//params.keep_vae_on_cpu = (inputs.kcpp_vae_device <= -2);
//params.keep_clip_on_cpu = (inputs.kcpp_clip_device <= -2);
params.lora_apply_mode = (lora_apply_mode_t)lora_apply_mode;

// also switches flash attn for the vae and conditioner
Expand Down
39 changes: 36 additions & 3 deletions otherarch/sdcpp/stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,24 @@ class StableDiffusionGGML {

StableDiffusionGGML() = default;

std::map<std::string, std::shared_ptr<struct ggml_backend> > kcpp_backends;
ggml_backend_t kcpp_get_backend(std::string mod = "", bool on_cpu = false) {
if (on_cpu) {
mod = "CPU";
}
std::string device_name = kcpp_sd_get_device_name(mod);
auto it = kcpp_backends.find(device_name);
if (it == kcpp_backends.end()) {
auto backend = std::shared_ptr<struct ggml_backend>(init_named_backend(device_name), ggml_backend_free);
kcpp_backends[device_name] = backend;
return backend.get();
} else {
return it->second.get();
}
}

~StableDiffusionGGML() {
#if 0 // kcpp
if (clip_backend != backend) {
ggml_backend_free(clip_backend);
}
Expand All @@ -174,6 +191,9 @@ class StableDiffusionGGML {
ggml_backend_free(vae_backend);
}
ggml_backend_free(backend);
# else // kcpp
// handled by the shared_ptr destructor
#endif // kcpp
}

std::string toLowerCase(const std::string& str) {
Expand All @@ -188,7 +208,7 @@ class StableDiffusionGGML {
}

void init_backend() {
backend = sd_get_default_backend();
backend = kcpp_get_backend();
}

std::shared_ptr<RNG> get_rng(rng_type_t rng_type) {
Expand Down Expand Up @@ -582,11 +602,17 @@ class StableDiffusionGGML {
: static_cast<size_t>(static_cast<double>(max_vram) * 1024.0 * 1024.0 * 1024.0);

{
clip_backend = backend;
#if 0 // kcpp
if (clip_on_cpu && !ggml_backend_is_cpu(backend)) {
LOG_INFO("CLIP: Using CPU backend");
clip_backend = ggml_backend_cpu_init();
}
#else // kcpp
clip_backend = kcpp_get_backend("clip", clip_on_cpu);
if (ggml_backend_is_cpu(clip_backend) && !ggml_backend_is_cpu(backend)) {
LOG_INFO("CLIP: Using CPU backend");
}
#endif
if (sd_version_is_sd3(version)) {
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(clip_backend,
offload_params_to_cpu,
Expand Down Expand Up @@ -703,7 +729,7 @@ class StableDiffusionGGML {
cond_stage_model = std::make_shared<LLMEmbedder>(clip_backend,
offload_params_to_cpu,
tensor_storage_map,
version);
version);
diffusion_model = std::make_shared<ZImageModel>(backend,
offload_params_to_cpu,
tensor_storage_map,
Expand Down Expand Up @@ -765,12 +791,19 @@ class StableDiffusionGGML {
high_noise_diffusion_model->get_param_tensors(tensors);
}

#if 1 // kcpp
vae_backend = kcpp_get_backend("vae", sd_ctx_params->keep_vae_on_cpu);
if (ggml_backend_is_cpu(vae_backend) && !ggml_backend_is_cpu(backend)) {
LOG_INFO("VAE Autoencoder: Using CPU backend");
}
#else // kcpp
if (sd_ctx_params->keep_vae_on_cpu && !ggml_backend_is_cpu(backend)) {
LOG_INFO("VAE Autoencoder: Using CPU backend");
vae_backend = ggml_backend_cpu_init();
} else {
vae_backend = backend;
}
#endif // kcpp

auto create_tae = [&]() -> std::shared_ptr<VAE> {
if (sd_version_is_wan(version) ||
Expand Down
2 changes: 1 addition & 1 deletion otherarch/sdcpp/upscaler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
int n_threads) {
ggml_log_set(ggml_log_callback_default, nullptr);

backend = sd_get_default_backend();
backend = init_named_backend(kcpp_sd_get_device_name("upscaler"));

ModelLoader model_loader;
if (!model_loader.init_from_file_and_convert_name(esrgan_path)) {
Expand Down
46 changes: 29 additions & 17 deletions otherarch/sdcpp/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,36 +747,48 @@ void kcpp_sd::set_sd_log_level(int log)
sdloglevel = log;
}

static int kcpp_main_gpu = -1;
void kcpp_sd::config_main_gpu(int value) {
static std::string kcpp_main_gpu = "";
static std::map<std::string, std::string> kcpp_backend_config;
void kcpp_sd::config_device(int value, const std::string& mod) {

ggml_backend_load_all_once();
std::string dev_name = "";
if (value >= 0) {
size_t dev_count = ggml_backend_dev_count();
size_t dev_index = static_cast<size_t>(value);
if (dev_index >= dev_count) {
LOG_WARN("device %d not found, falling back to default", value);
value = -1;
} else {
dev_name = ggml_backend_dev_name(ggml_backend_dev_get(dev_index));
}
} else if (value <= -2) {
value = -2;
dev_name = "CPU";
}
if (mod == "" || mod == "main") {
kcpp_main_gpu = dev_name;
} else if (dev_name == "") {
kcpp_backend_config.erase(mod);
} else {
kcpp_backend_config[mod] = dev_name;
}
}
std::string kcpp_sd_get_device_name(const std::string& mod) {
auto it = kcpp_backend_config.find(mod);
if (it != kcpp_backend_config.end()) {
return it->second;
} else if (kcpp_main_gpu == "") {
return get_default_backend_name();
} else {
return kcpp_main_gpu;
}
kcpp_main_gpu = value;
}
static ggml_backend_t kcpp_get_main_gpu() {
ggml_backend_t backend = nullptr;
if (kcpp_main_gpu != -1) {
std::string dev_name;
if (kcpp_main_gpu <= -2) {
dev_name = "CPU";
} else {
auto dev = ggml_backend_dev_get(static_cast<size_t>(kcpp_main_gpu));
dev_name = ggml_backend_dev_name(dev);
}
backend = init_named_backend(dev_name);
if (backend) {
if (kcpp_main_gpu != "") {
std::string dev_name = kcpp_sd_get_device_name();
if (dev_name != "") {
backend = init_named_backend(dev_name);
LOG_INFO("Setting %s as main device (#%d)", dev_name.c_str(), kcpp_main_gpu);
} else {
LOG_WARN("Couldn't initialize device #%d; falling back to the default device", kcpp_main_gpu);
}
}
return backend;
Expand Down
1 change: 1 addition & 0 deletions otherarch/sdcpp/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ bool sd_should_preview_noisy();

// test if the backend is a specific one, e.g. "CUDA", "ROCm", "Vulkan" etc.
bool sd_backend_is(ggml_backend_t backend, const std::string& name);
std::string kcpp_sd_get_device_name(const std::string& mod = "");
ggml_backend_t sd_get_default_backend();

void log_message(const char* format, ...);
Expand Down