Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
bool valid = cli_params.resolve_and_validate();
if (valid && cli_params.mode != METADATA) {
valid = ctx_params.resolve_and_validate(cli_params.mode) &&
gen_params.resolve_and_validate(cli_params.mode, ctx_params.lora_model_dir);
gen_params.resolve_and_validate(cli_params.mode,
ctx_params.lora_model_dir,
ctx_params.hires_upscalers_dir);
}

if (!valid) {
Expand Down Expand Up @@ -688,6 +690,10 @@ int main(int argc, const char* argv[]) {
vae_decode_only = false;
}

if (gen_params.hires_enabled && !gen_params.hires_upscaler_model_path.empty()) {
vae_decode_only = false;
}

sd_ctx_params_t sd_ctx_params = ctx_params.to_sd_ctx_params_t(vae_decode_only, true, cli_params.taesd_preview);

SDImageVec results;
Expand Down
177 changes: 173 additions & 4 deletions examples/common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,10 @@ ArgOptions SDContextParams::get_options() {
"--lora-model-dir",
"lora model directory",
&lora_model_dir},

{"",
"--hires-upscalers-dir",
"highres fix upscaler model directory",
&hires_upscalers_dir},
{"",
"--tensor-type-rules",
"weight type per tensor pattern (example: \"^vae\\.=f16,model\\.=q8_0\")",
Expand Down Expand Up @@ -649,6 +652,7 @@ std::string SDContextParams::to_string() const {
<< " wtype: " << sd_type_name(wtype) << ",\n"
<< " tensor_type_rules: \"" << tensor_type_rules << "\",\n"
<< " lora_model_dir: \"" << lora_model_dir << "\",\n"
<< " hires_upscalers_dir: \"" << hires_upscalers_dir << "\",\n"
<< " photo_maker_path: \"" << photo_maker_path << "\",\n"
<< " rng_type: " << sd_rng_type_name(rng_type) << ",\n"
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
Expand Down Expand Up @@ -777,6 +781,10 @@ ArgOptions SDGenerationParams::get_options() {
"--pm-id-embed-path",
"path to PHOTOMAKER v2 id embed",
&pm_id_embed_path},
{"",
"--hires-upscaler",
"highres fix upscaler, Latent (nearest) or a model name/path under --hires-upscalers-dir (default: Latent (nearest))",
&hires_upscaler},
};

options.int_options = {
Expand Down Expand Up @@ -826,6 +834,22 @@ ArgOptions SDGenerationParams::get_options() {
"--upscale-tile-size",
"tile size for ESRGAN upscaling (default: 128)",
&upscale_tile_size},
{"",
"--hires-width",
"highres fix target width, 0 to use --hires-scale (default: 0)",
&hires_width},
{"",
"--hires-height",
"highres fix target height, 0 to use --hires-scale (default: 0)",
&hires_height},
{"",
"--hires-steps",
"highres fix second pass sample steps, 0 to reuse --steps (default: 0)",
&hires_steps},
{"",
"--hires-upscale-tile-size",
"highres fix upscaler tile size, reserved for model-backed upscalers (default: 128)",
&hires_upscale_tile_size},
};

options.float_options = {
Expand Down Expand Up @@ -913,6 +937,14 @@ ArgOptions SDGenerationParams::get_options() {
"--vae-tile-overlap",
"tile overlap for vae tiling, in fraction of tile size (default: 0.5)",
&vae_tiling_params.target_overlap},
{"",
"--hires-scale",
"highres fix scale when target size is not set (default: 2.0)",
&hires_scale},
{"",
"--hires-denoising-strength",
"highres fix second pass denoising strength (default: 0.7)",
&hires_denoising_strength},
};

options.bool_options = {
Expand All @@ -936,6 +968,11 @@ ArgOptions SDGenerationParams::get_options() {
"process vae in tiles to reduce memory usage",
true,
&vae_tiling_params.enabled},
{"",
"--hires",
"enable highres fix",
true,
&hires_enabled},
};

auto on_seed_arg = [&](int argc, const char** argv, int index) {
Expand Down Expand Up @@ -1424,6 +1461,37 @@ static bool parse_lora_json_field(const json& parent,
return true;
}

static bool resolve_model_file_from_dir(const std::string& model_name,
const std::string& model_dir,
const std::vector<std::string>& valid_ext,
const char* label,
std::string& resolved_path) {
if (model_dir.empty()) {
LOG_ERROR("%s directory is empty", label);
return false;
}
if (model_name.empty() ||
model_name.find('/') != std::string::npos ||
model_name.find('\\') != std::string::npos ||
fs::path(model_name).has_root_path() ||
fs::path(model_name).has_extension()) {
LOG_ERROR("%s must be a model name without path or extension: %s", label, model_name.c_str());
return false;
}

fs::path model_dir_path = model_dir;
for (const auto& ext : valid_ext) {
fs::path try_path = model_dir_path / (model_name + ext);
if (fs::exists(try_path) && fs::is_regular_file(try_path)) {
resolved_path = try_path.lexically_normal().string();
return true;
}
}

LOG_ERROR("can not find %s %s in %s", label, model_name.c_str(), model_dir_path.lexically_normal().string().c_str());
return false;
}

bool SDGenerationParams::from_json_str(
const std::string& json_str,
const std::function<std::string(const std::string&)>& lora_path_resolver) {
Expand Down Expand Up @@ -1487,6 +1555,34 @@ bool SDGenerationParams::from_json_str(
load_if_exists("increase_ref_index", increase_ref_index);
load_if_exists("embed_image_metadata", embed_image_metadata);

if (j.contains("hires") && j["hires"].is_object()) {
const json& hires_json = j["hires"];
if (hires_json.contains("enabled") && hires_json["enabled"].is_boolean()) {
hires_enabled = hires_json["enabled"];
}
if (hires_json.contains("upscaler") && hires_json["upscaler"].is_string()) {
hires_upscaler = hires_json["upscaler"];
}
if (hires_json.contains("scale") && hires_json["scale"].is_number()) {
hires_scale = hires_json["scale"];
}
if (hires_json.contains("target_width") && hires_json["target_width"].is_number_integer()) {
hires_width = hires_json["target_width"];
}
if (hires_json.contains("target_height") && hires_json["target_height"].is_number_integer()) {
hires_height = hires_json["target_height"];
}
if (hires_json.contains("steps") && hires_json["steps"].is_number_integer()) {
hires_steps = hires_json["steps"];
}
if (hires_json.contains("denoising_strength") && hires_json["denoising_strength"].is_number()) {
hires_denoising_strength = hires_json["denoising_strength"];
}
if (hires_json.contains("upscale_tile_size") && hires_json["upscale_tile_size"].is_number_integer()) {
hires_upscale_tile_size = hires_json["upscale_tile_size"];
}
}

auto parse_sample_params_json = [&](const json& sample_json,
sd_sample_params_t& target_params,
std::vector<int>& target_skip_layers,
Expand Down Expand Up @@ -1800,7 +1896,7 @@ bool SDGenerationParams::initialize_cache_params() {
return true;
}

bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict) {
bool SDGenerationParams::resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict) {
if (high_noise_sample_params.sample_steps <= 0) {
high_noise_sample_params.sample_steps = -1;
}
Expand All @@ -1819,6 +1915,27 @@ bool SDGenerationParams::resolve(const std::string& lora_model_dir, bool strict)
sample_params.sample_steps = std::clamp(sample_params.sample_steps, 1, 100);
}

hires_upscaler_model_path.clear();
if (hires_enabled) {
if (hires_upscaler.empty()) {
hires_upscaler = "Latent (nearest)";
}
resolved_hires_upscaler = str_to_sd_hires_upscaler(hires_upscaler.c_str());
if (resolved_hires_upscaler == SD_HIRES_UPSCALER_NONE) {
hires_enabled = false;
} else if (resolved_hires_upscaler == SD_HIRES_UPSCALER_COUNT) {
static const std::vector<std::string> valid_ext = {".gguf", ".safetensors", ".pt", ".pth"};
if (!resolve_model_file_from_dir(hires_upscaler,
hires_upscalers_dir,
valid_ext,
"hires upscaler",
hires_upscaler_model_path)) {
return false;
}
resolved_hires_upscaler = SD_HIRES_UPSCALER_MODEL;
}
}

prompt_with_lora = prompt;
if (!lora_model_dir.empty()) {
extract_and_remove_lora(lora_model_dir);
Expand Down Expand Up @@ -1883,6 +2000,29 @@ bool SDGenerationParams::validate(SDMode mode) {
return false;
}

if (hires_enabled) {
if (hires_width < 0 || hires_height < 0) {
LOG_ERROR("error: hires target width and height must be >= 0");
return false;
}
if (hires_scale <= 0.f && hires_width <= 0 && hires_height <= 0) {
LOG_ERROR("error: hires scale must be positive when target size is not set");
return false;
}
if (hires_steps < 0) {
LOG_ERROR("error: hires steps must be >= 0");
return false;
}
if (hires_denoising_strength <= 0.f || hires_denoising_strength > 1.f) {
LOG_ERROR("error: hires denoising strength must be in (0.0, 1.0]");
return false;
}
if (hires_upscale_tile_size < 1) {
LOG_ERROR("error: hires upscale tile size must be positive");
return false;
}
}

if (mode == UPSCALE) {
if (init_image_path.length() == 0) {
LOG_ERROR("error: upscale mode needs an init image (--init-img)\n");
Expand All @@ -1893,8 +2033,11 @@ bool SDGenerationParams::validate(SDMode mode) {
return true;
}

bool SDGenerationParams::resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict) {
if (!resolve(lora_model_dir, strict)) {
bool SDGenerationParams::resolve_and_validate(SDMode mode,
const std::string& lora_model_dir,
const std::string& hires_upscalers_dir,
bool strict) {
if (!resolve(lora_model_dir, hires_upscalers_dir, strict)) {
return false;
}
if (!validate(mode)) {
Expand Down Expand Up @@ -1965,6 +2108,16 @@ sd_img_gen_params_t SDGenerationParams::to_sd_img_gen_params_t() {
params.pm_params = pm_params;
params.vae_tiling_params = vae_tiling_params;
params.cache = cache_params;

params.hires.enabled = hires_enabled;
params.hires.upscaler = resolved_hires_upscaler;
params.hires.model_path = hires_upscaler_model_path.empty() ? nullptr : hires_upscaler_model_path.c_str();
params.hires.scale = hires_scale;
params.hires.target_width = hires_width;
params.hires.target_height = hires_height;
params.hires.steps = hires_steps;
params.hires.denoising_strength = hires_denoising_strength;
params.hires.upscale_tile_size = hires_upscale_tile_size;
return params;
}

Expand Down Expand Up @@ -2089,6 +2242,15 @@ std::string SDGenerationParams::to_string() const {
<< " seed: " << seed << ",\n"
<< " upscale_repeats: " << upscale_repeats << ",\n"
<< " upscale_tile_size: " << upscale_tile_size << ",\n"
<< " hires: { enabled: " << (hires_enabled ? "true" : "false")
<< ", upscaler: \"" << hires_upscaler << "\""
<< ", model_path: \"" << hires_upscaler_model_path << "\""
<< ", scale: " << hires_scale
<< ", target_width: " << hires_width
<< ", target_height: " << hires_height
<< ", steps: " << hires_steps
<< ", denoising_strength: " << hires_denoising_strength
<< ", upscale_tile_size: " << hires_upscale_tile_size << " },\n"
<< " vae_tiling_params: { "
<< vae_tiling_params.enabled << ", "
<< vae_tiling_params.tile_size_x << ", "
Expand Down Expand Up @@ -2162,6 +2324,13 @@ std::string get_image_params(const SDContextParams& ctx_params, const SDGenerati
if (gen_params.clip_skip != -1) {
parameter_string += "Clip skip: " + std::to_string(gen_params.clip_skip) + ", ";
}
if (gen_params.hires_enabled) {
parameter_string += "Hires upscale: " + gen_params.hires_upscaler + ", ";
parameter_string += "Hires scale: " + std::to_string(gen_params.hires_scale) + ", ";
parameter_string += "Hires resize: " + std::to_string(gen_params.hires_width) + "x" + std::to_string(gen_params.hires_height) + ", ";
parameter_string += "Hires steps: " + std::to_string(gen_params.hires_steps) + ", ";
parameter_string += "Denoising strength: " + std::to_string(gen_params.hires_denoising_strength) + ", ";
}
parameter_string += "Version: stable-diffusion.cpp";
return parameter_string;
}
19 changes: 17 additions & 2 deletions examples/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ struct SDContextParams {
sd_type_t wtype = SD_TYPE_COUNT;
std::string tensor_type_rules;
std::string lora_model_dir = ".";
std::string hires_upscalers_dir;

std::map<std::string, std::string> embedding_map;
std::vector<sd_embedding_t> embedding_vec;
Expand Down Expand Up @@ -190,12 +191,23 @@ struct SDGenerationParams {
int upscale_repeats = 1;
int upscale_tile_size = 128;

bool hires_enabled = false;
std::string hires_upscaler = "Latent (nearest)";
std::string hires_upscaler_model_path;
float hires_scale = 2.f;
int hires_width = 0;
int hires_height = 0;
int hires_steps = 0;
float hires_denoising_strength = 0.7f;
int hires_upscale_tile_size = 128;

std::map<std::string, float> lora_map;
std::map<std::string, float> high_noise_lora_map;

// Derived and normalized fields.
std::string prompt_with_lora; // for metadata record only
std::vector<sd_lora_t> lora_vec;
sd_hires_upscaler_t resolved_hires_upscaler;

// Owned execution payload.
SDImageOwner init_image;
Expand Down Expand Up @@ -225,9 +237,12 @@ struct SDGenerationParams {
void set_width_and_height_if_unset(int w, int h);
int get_resolved_width() const;
int get_resolved_height() const;
bool resolve(const std::string& lora_model_dir, bool strict = false);
bool resolve(const std::string& lora_model_dir, const std::string& hires_upscalers_dir, bool strict = false);
bool validate(SDMode mode);
bool resolve_and_validate(SDMode mode, const std::string& lora_model_dir, bool strict = false);
bool resolve_and_validate(SDMode mode,
const std::string& lora_model_dir,
const std::string& hires_upscalers_dir,
bool strict = false);
sd_img_gen_params_t to_sd_img_gen_params_t();
sd_vid_gen_params_t to_sd_vid_gen_params_t();
std::string to_string() const;
Expand Down
Loading
Loading