|
| 1 | +#include <cstddef> |
| 2 | +#include <cstdint> |
| 3 | +#include "ggml.h" |
| 4 | + |
| 5 | +const float wan_21_latent_rgb_proj[16][3] = { |
| 6 | + {0.015123f, -0.148418f, 0.479828f}, |
| 7 | + {0.003652f, -0.010680f, -0.037142f}, |
| 8 | + {0.212264f, 0.063033f, 0.016779f}, |
| 9 | + {0.232999f, 0.406476f, 0.220125f}, |
| 10 | + {-0.051864f, -0.082384f, -0.069396f}, |
| 11 | + {0.085005f, -0.161492f, 0.010689f}, |
| 12 | + {-0.245369f, -0.506846f, -0.117010f}, |
| 13 | + {-0.151145f, 0.017721f, 0.007207f}, |
| 14 | + {-0.293239f, -0.207936f, -0.421135f}, |
| 15 | + {-0.187721f, 0.050783f, 0.177649f}, |
| 16 | + {-0.013067f, 0.265964f, 0.166578f}, |
| 17 | + {0.028327f, 0.109329f, 0.108642f}, |
| 18 | + {-0.205343f, 0.043991f, 0.148914f}, |
| 19 | + {0.014307f, -0.048647f, -0.007219f}, |
| 20 | + {0.217150f, 0.053074f, 0.319923f}, |
| 21 | + {0.155357f, 0.083156f, 0.064780f}}; |
| 22 | +float wan_21_latent_rgb_bias[3] = {-0.270270f, -0.234976f, -0.456853f}; |
| 23 | + |
| 24 | +const float wan_22_latent_rgb_proj[48][3] = { |
| 25 | + {0.017126f, -0.027230f, -0.019257f}, |
| 26 | + {-0.113739f, -0.028715f, -0.022885f}, |
| 27 | + {-0.000106f, 0.021494f, 0.004629f}, |
| 28 | + {-0.013273f, -0.107137f, -0.033638f}, |
| 29 | + {-0.000381f, 0.000279f, 0.025877f}, |
| 30 | + {-0.014216f, -0.003975f, 0.040528f}, |
| 31 | + {0.001638f, -0.000748f, 0.011022f}, |
| 32 | + {0.029238f, -0.006697f, 0.035933f}, |
| 33 | + {0.021641f, -0.015874f, 0.040531f}, |
| 34 | + {-0.101984f, -0.070160f, -0.028855f}, |
| 35 | + {0.033207f, -0.021068f, 0.002663f}, |
| 36 | + {-0.104711f, 0.121673f, 0.102981f}, |
| 37 | + {0.082647f, -0.004991f, 0.057237f}, |
| 38 | + {-0.027375f, 0.031581f, 0.006868f}, |
| 39 | + {-0.045434f, 0.029444f, 0.019287f}, |
| 40 | + {-0.046572f, -0.012537f, 0.006675f}, |
| 41 | + {0.074709f, 0.033690f, 0.025289f}, |
| 42 | + {-0.008251f, -0.002745f, -0.006999f}, |
| 43 | + {0.012685f, -0.061856f, -0.048658f}, |
| 44 | + {0.042304f, -0.007039f, 0.000295f}, |
| 45 | + {-0.007644f, -0.060843f, -0.033142f}, |
| 46 | + {0.159909f, 0.045628f, 0.367541f}, |
| 47 | + {0.095171f, 0.086438f, 0.010271f}, |
| 48 | + {0.006812f, 0.019643f, 0.029637f}, |
| 49 | + {0.003467f, -0.010705f, 0.014252f}, |
| 50 | + {-0.099681f, -0.066272f, -0.006243f}, |
| 51 | + {0.047357f, 0.037040f, 0.000185f}, |
| 52 | + {-0.041797f, -0.089225f, -0.032257f}, |
| 53 | + {0.008928f, 0.017028f, 0.018684f}, |
| 54 | + {-0.042255f, 0.016045f, 0.006849f}, |
| 55 | + {0.011268f, 0.036462f, 0.037387f}, |
| 56 | + {0.011553f, -0.016375f, -0.048589f}, |
| 57 | + {0.046266f, -0.027189f, 0.056979f}, |
| 58 | + {0.009640f, -0.017576f, 0.030324f}, |
| 59 | + {-0.045794f, -0.036083f, -0.010616f}, |
| 60 | + {0.022418f, 0.039783f, -0.032939f}, |
| 61 | + {-0.052714f, -0.015525f, 0.007438f}, |
| 62 | + {0.193004f, 0.223541f, 0.264175f}, |
| 63 | + {-0.059406f, -0.008188f, 0.022867f}, |
| 64 | + {-0.156742f, -0.263791f, -0.007385f}, |
| 65 | + {-0.015717f, 0.016570f, 0.033969f}, |
| 66 | + {0.037969f, 0.109835f, 0.200449f}, |
| 67 | + {-0.000782f, -0.009566f, -0.008058f}, |
| 68 | + {0.010709f, 0.052960f, -0.044195f}, |
| 69 | + {0.017271f, 0.045839f, 0.034569f}, |
| 70 | + {0.009424f, 0.013088f, -0.001714f}, |
| 71 | + {-0.024805f, -0.059378f, -0.033756f}, |
| 72 | + {-0.078293f, 0.029070f, 0.026129f}}; |
| 73 | +float wan_22_latent_rgb_bias[3] = {0.013160f, -0.096492f, -0.071323f}; |
| 74 | + |
| 75 | +const float flux_latent_rgb_proj[16][3] = { |
| 76 | + {-0.041168f, 0.019917f, 0.097253f}, |
| 77 | + {0.028096f, 0.026730f, 0.129576f}, |
| 78 | + {0.065618f, -0.067950f, -0.014651f}, |
| 79 | + {-0.012998f, -0.014762f, 0.081251f}, |
| 80 | + {0.078567f, 0.059296f, -0.024687f}, |
| 81 | + {-0.015987f, -0.003697f, 0.005012f}, |
| 82 | + {0.033605f, 0.138999f, 0.068517f}, |
| 83 | + {-0.024450f, -0.063567f, -0.030101f}, |
| 84 | + {-0.040194f, -0.016710f, 0.127185f}, |
| 85 | + {0.112681f, 0.088764f, -0.041940f}, |
| 86 | + {-0.023498f, 0.093664f, 0.025543f}, |
| 87 | + {0.082899f, 0.048320f, 0.007491f}, |
| 88 | + {0.075712f, 0.074139f, 0.081965f}, |
| 89 | + {-0.143501f, 0.018263f, -0.136138f}, |
| 90 | + {-0.025767f, -0.082035f, -0.040023f}, |
| 91 | + {-0.111849f, -0.055589f, -0.032361f}}; |
| 92 | +float flux_latent_rgb_bias[3] = {0.024600f, -0.006937f, -0.008089f}; |
| 93 | + |
| 94 | +// This one was taken straight from |
| 95 | +// https://github.com/Stability-AI/sd3.5/blob/8565799a3b41eb0c7ba976d18375f0f753f56402/sd3_impls.py#L288-L303 |
| 96 | +// (MiT Licence) |
| 97 | +const float sd3_latent_rgb_proj[16][3] = { |
| 98 | + {-0.0645f, 0.0177f, 0.1052f}, |
| 99 | + {0.0028f, 0.0312f, 0.0650f}, |
| 100 | + {0.1848f, 0.0762f, 0.0360f}, |
| 101 | + {0.0944f, 0.0360f, 0.0889f}, |
| 102 | + {0.0897f, 0.0506f, -0.0364f}, |
| 103 | + {-0.0020f, 0.1203f, 0.0284f}, |
| 104 | + {0.0855f, 0.0118f, 0.0283f}, |
| 105 | + {-0.0539f, 0.0658f, 0.1047f}, |
| 106 | + {-0.0057f, 0.0116f, 0.0700f}, |
| 107 | + {-0.0412f, 0.0281f, -0.0039f}, |
| 108 | + {0.1106f, 0.1171f, 0.1220f}, |
| 109 | + {-0.0248f, 0.0682f, -0.0481f}, |
| 110 | + {0.0815f, 0.0846f, 0.1207f}, |
| 111 | + {-0.0120f, -0.0055f, -0.0867f}, |
| 112 | + {-0.0749f, -0.0634f, -0.0456f}, |
| 113 | + {-0.1418f, -0.1457f, -0.1259f}, |
| 114 | +}; |
| 115 | +float sd3_latent_rgb_bias[3] = {0, 0, 0}; |
| 116 | + |
| 117 | +const float sdxl_latent_rgb_proj[4][3] = { |
| 118 | + {0.258303f, 0.277640f, 0.329699f}, |
| 119 | + {-0.299701f, 0.105446f, 0.014194f}, |
| 120 | + {0.050522f, 0.186163f, -0.143257f}, |
| 121 | + {-0.211938f, -0.149892f, -0.080036f}}; |
| 122 | +float sdxl_latent_rgb_bias[3] = {0.144381f, -0.033313f, 0.007061f}; |
| 123 | + |
| 124 | +const float sd_latent_rgb_proj[4][3] = { |
| 125 | + {0.337366f, 0.216344f, 0.257386f}, |
| 126 | + {0.165636f, 0.386828f, 0.046994f}, |
| 127 | + {-0.267803f, 0.237036f, 0.223517f}, |
| 128 | + {-0.178022f, -0.200862f, -0.678514f}}; |
| 129 | +float sd_latent_rgb_bias[3] = {-0.017478f, -0.055834f, -0.105825f}; |
| 130 | + |
| 131 | +void preview_latent_video(uint8_t* buffer, struct ggml_tensor* latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int width, int height, int frames, int dim) { |
| 132 | + size_t buffer_head = 0; |
| 133 | + for (int k = 0; k < frames; k++) { |
| 134 | + for (int j = 0; j < height; j++) { |
| 135 | + for (int i = 0; i < width; i++) { |
| 136 | + size_t latent_id = (i * latents->nb[0] + j * latents->nb[1] + k * latents->nb[2]); |
| 137 | + float r = 0, g = 0, b = 0; |
| 138 | + if (latent_rgb_proj != nullptr) { |
| 139 | + for (int d = 0; d < dim; d++) { |
| 140 | + float value = *(float*)((char*)latents->data + latent_id + d * latents->nb[ggml_n_dims(latents) - 1]); |
| 141 | + r += value * latent_rgb_proj[d][0]; |
| 142 | + g += value * latent_rgb_proj[d][1]; |
| 143 | + b += value * latent_rgb_proj[d][2]; |
| 144 | + } |
| 145 | + } else { |
| 146 | + // interpret first 3 channels as RGB |
| 147 | + r = *(float*)((char*)latents->data + latent_id + 0 * latents->nb[ggml_n_dims(latents) - 1]); |
| 148 | + g = *(float*)((char*)latents->data + latent_id + 1 * latents->nb[ggml_n_dims(latents) - 1]); |
| 149 | + b = *(float*)((char*)latents->data + latent_id + 2 * latents->nb[ggml_n_dims(latents) - 1]); |
| 150 | + } |
| 151 | + if (latent_rgb_bias != nullptr) { |
| 152 | + // bias |
| 153 | + r += latent_rgb_bias[0]; |
| 154 | + g += latent_rgb_bias[1]; |
| 155 | + b += latent_rgb_bias[2]; |
| 156 | + } |
| 157 | + // change range |
| 158 | + r = r * .5f + .5f; |
| 159 | + g = g * .5f + .5f; |
| 160 | + b = b * .5f + .5f; |
| 161 | + |
| 162 | + // clamp rgb values to [0,1] range |
| 163 | + r = r >= 0 ? r <= 1 ? r : 1 : 0; |
| 164 | + g = g >= 0 ? g <= 1 ? g : 1 : 0; |
| 165 | + b = b >= 0 ? b <= 1 ? b : 1 : 0; |
| 166 | + |
| 167 | + buffer[buffer_head++] = (uint8_t)(r * 255); |
| 168 | + buffer[buffer_head++] = (uint8_t)(g * 255); |
| 169 | + buffer[buffer_head++] = (uint8_t)(b * 255); |
| 170 | + } |
| 171 | + } |
| 172 | + } |
| 173 | +} |
0 commit comments