diff --git a/CMakeLists.txt b/CMakeLists.txt
index e54fc988d..6a9fb1041 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -156,10 +156,12 @@ endif()
 
 set(SD_LIB stable-diffusion)
 
-file(GLOB SD_LIB_SOURCES
+file(GLOB SD_LIB_SOURCES CONFIGURE_DEPENDS
     "src/*.h"
     "src/*.cpp"
     "src/*.hpp"
+    "src/model_io/*.h"
+    "src/model_io/*.cpp"
     "src/tokenizers/*.h"
     "src/tokenizers/*.cpp"
     "src/tokenizers/vocab/*.h"
diff --git a/format-code.sh b/format-code.sh
index 5c30fb4ff..8aa422bca 100644
--- a/format-code.sh
+++ b/format-code.sh
@@ -1,5 +1,5 @@
 for f in src/*.cpp src/*.h src/*.hpp src/tokenizers/*.h src/tokenizers/*.cpp src/tokenizers/vocab/*.h src/tokenizers/vocab/*.cpp \
-         examples/cli/*.cpp examples/cli/*.h examples/server/*.cpp \
+         src/model_io/*.h src/model_io/*.cpp examples/cli/*.cpp examples/cli/*.h examples/server/*.cpp \
          examples/common/*.hpp examples/common/*.h examples/common/*.cpp; do
   [[ "$f" == vocab* ]] && continue
   echo "formatting '$f'"
diff --git a/src/denoiser.hpp b/src/denoiser.hpp
index 35093e10d..6c2d9cc62 100644
--- a/src/denoiser.hpp
+++ b/src/denoiser.hpp
@@ -977,7 +977,7 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
                                                         float eta = 1.0f) {
     int steps = static_cast<int>(sigmas.size()) - 1;
     for (int i = 0; i < steps; i++) {
-        float sigma = sigmas[i];
+        float sigma    = sigmas[i];
         float sigma_to = sigmas[i + 1];
 
         bool opt_first_step = (1.0 - sigma < 1e-6);
@@ -1040,10 +1040,10 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
                 // and sigma_s = sigma_fn(s) = 1.0f / (exp(s) + 1.0f)
 
                 float exp_s = std::sqrt(((1 - sigma) / sigma) * ((1 - sigma_down) / sigma_down));
-                sigma_s = 1.0f / (exp_s + 1.0f);
+                sigma_s     = 1.0f / (exp_s + 1.0f);
 
                 float sigma_s_i_ratio = sigma_s / sigma;
-                sd::Tensor<float> u = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
+                sd::Tensor<float> u   = (x * sigma_s_i_ratio) + (denoised * (1.0f - sigma_s_i_ratio));
 
                 auto denoised2_opt = model(u, sigma_s, i + 1);
                 if (denoised2_opt.empty()) {
@@ -1053,7 +1053,7 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
             }
 
             float sigma_down_i_ratio = sigma_down / sigma;
-            x = (x * sigma_down_i_ratio) + (D_i * (1.0f - sigma_down_i_ratio));
+            x                        = (x * sigma_down_i_ratio) + (D_i * (1.0f - sigma_down_i_ratio));
 
             if (sigma_to > 0.0f && eta > 0.0f) {
                 x = alpha_scale * x + sd::Tensor<float>::randn_like(x, rng) * sigma_up;
@@ -1064,8 +1064,6 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral_flow(denoise_cb_t model,
     return x;
 }
 
-
-
 static sd::Tensor<float> sample_dpmpp_2m(denoise_cb_t model,
                                          sd::Tensor<float> x,
                                          const std::vector<float>& sigmas) {
diff --git a/src/model.cpp b/src/model.cpp
index 67b54d063..2594267fb 100644
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -12,8 +12,10 @@
 #include <unordered_map>
 #include <vector>
 
-#include "gguf_reader.hpp"
 #include "model.h"
+#include "model_io/ckpt_io.h"
+#include "model_io/gguf_io.h"
+#include "model_io/safetensors_io.h"
 #include "stable-diffusion.h"
 #include "util.h"
 
@@ -21,6 +23,7 @@
 #include "ggml-backend.h"
 #include "ggml-cpu.h"
 #include "ggml.h"
+#include "zip.h"
 
 #include "name_conversion.h"
 #include "stable-diffusion.h"
@@ -37,40 +40,6 @@
 #include "ggml-opencl.h"
 #endif
 
-#define ST_HEADER_SIZE_LEN 8
-
-uint64_t read_u64(uint8_t* buffer) {
-    // little endian
-    uint64_t value = 0;
-    value |= static_cast<int64_t>(buffer[7]) << 56;
-    value |= static_cast<int64_t>(buffer[6]) << 48;
-    value |= static_cast<int64_t>(buffer[5]) << 40;
-    value |= static_cast<int64_t>(buffer[4]) << 32;
-    value |= static_cast<int64_t>(buffer[3]) << 24;
-    value |= static_cast<int64_t>(buffer[2]) << 16;
-    value |= static_cast<int64_t>(buffer[1]) << 8;
-    value |= static_cast<int64_t>(buffer[0]);
-    return value;
-}
-
-int32_t read_int(uint8_t* buffer) {
-    // little endian
-    int value = 0;
-    value |= buffer[3] << 24;
-    value |= buffer[2] << 16;
-    value |= buffer[1] << 8;
-    value |= buffer[0];
-    return value;
-}
-
-uint16_t read_short(uint8_t* buffer) {
-    // little endian
-    uint16_t value = 0;
-    value |= buffer[1] << 8;
-    value |= buffer[0];
-    return value;
-}
-
 /*================================================= Preprocess ==================================================*/
 
 const char* unused_tensors[] = {
@@ -250,79 +219,6 @@ void ModelLoader::add_tensor_storage(const TensorStorage& tensor_storage) {
     tensor_storage_map[tensor_storage.name] = tensor_storage;
 }
 
-bool is_zip_file(const std::string& file_path) {
-    zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
-    if (zip == nullptr) {
-        return false;
-    }
-    zip_close(zip);
-    return true;
-}
-
-bool is_gguf_file(const std::string& file_path) {
-    std::ifstream file(file_path, std::ios::binary);
-    if (!file.is_open()) {
-        return false;
-    }
-
-    char magic[4];
-
-    file.read(magic, sizeof(magic));
-    if (!file) {
-        return false;
-    }
-    for (uint32_t i = 0; i < sizeof(magic); i++) {
-        if (magic[i] != GGUF_MAGIC[i]) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-bool is_safetensors_file(const std::string& file_path) {
-    std::ifstream file(file_path, std::ios::binary);
-    if (!file.is_open()) {
-        return false;
-    }
-
-    // get file size
-    file.seekg(0, file.end);
-    size_t file_size_ = file.tellg();
-    file.seekg(0, file.beg);
-
-    // read header size
-    if (file_size_ <= ST_HEADER_SIZE_LEN) {
-        return false;
-    }
-
-    uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
-    file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
-    if (!file) {
-        return false;
-    }
-
-    size_t header_size_ = read_u64(header_size_buf);
-    if (header_size_ >= file_size_ || header_size_ <= 2) {
-        return false;
-    }
-
-    // read header
-    std::vector<char> header_buf;
-    header_buf.resize(header_size_ + 1);
-    header_buf[header_size_] = '\0';
-    file.read(header_buf.data(), header_size_);
-    if (!file) {
-        return false;
-    }
-    try {
-        nlohmann::json header_ = nlohmann::json::parse(header_buf.data());
-    } catch (const std::exception&) {
-        return false;
-    }
-    return true;
-}
-
 bool ModelLoader::init_from_file(const std::string& file_path, const std::string& prefix) {
     if (is_directory(file_path)) {
         LOG_INFO("load %s using diffusers format", file_path.c_str());
@@ -333,7 +229,7 @@ bool ModelLoader::init_from_file(const std::string& file_path, const std::string
     } else if (is_safetensors_file(file_path)) {
         LOG_INFO("load %s using safetensors format", file_path.c_str());
         return init_from_safetensors_file(file_path, prefix);
-    } else if (is_zip_file(file_path)) {
+    } else if (is_ckpt_file(file_path)) {
         LOG_INFO("load %s using checkpoint format", file_path.c_str());
         return init_from_ckpt_file(file_path, prefix);
     } else {
@@ -375,242 +271,59 @@ bool ModelLoader::init_from_file_and_convert_name(const std::string& file_path,
 
 bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::string& prefix) {
     LOG_DEBUG("init from '%s'", file_path.c_str());
-    file_paths_.push_back(file_path);
-    size_t file_index = file_paths_.size() - 1;
-
-    gguf_context* ctx_gguf_ = nullptr;
-    ggml_context* ctx_meta_ = nullptr;
-
-    ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_});
-    if (!ctx_gguf_) {
-        LOG_ERROR("failed to open '%s' with gguf_init_from_file. Try to open it with GGUFReader.", file_path.c_str());
-        GGUFReader gguf_reader;
-        if (!gguf_reader.load(file_path)) {
-            LOG_ERROR("failed to open '%s' with GGUFReader.", file_path.c_str());
-            return false;
-        }
-
-        size_t data_offset = gguf_reader.data_offset();
-        for (const auto& gguf_tensor_info : gguf_reader.tensors()) {
-            std::string name = gguf_tensor_info.name;
-            if (!starts_with(name, prefix)) {
-                name = prefix + name;
-            }
-
-            TensorStorage tensor_storage(
-                name,
-                gguf_tensor_info.type,
-                gguf_tensor_info.shape.data(),
-                static_cast<int>(gguf_tensor_info.shape.size()),
-                file_index,
-                data_offset + gguf_tensor_info.offset);
 
-            // LOG_DEBUG("%s %s", name.c_str(), tensor_storage.to_string().c_str());
-
-            add_tensor_storage(tensor_storage);
-        }
-
-        return true;
+    std::vector<TensorStorage> tensor_storages;
+    std::string error;
+    if (!read_gguf_file(file_path, tensor_storages, &error)) {
+        LOG_ERROR("%s", error.c_str());
+        return false;
     }
 
-    int n_tensors = static_cast<int>(gguf_get_n_tensors(ctx_gguf_));
-
-    size_t total_size  = 0;
-    size_t data_offset = gguf_get_data_offset(ctx_gguf_);
-    for (int i = 0; i < n_tensors; i++) {
-        std::string name   = gguf_get_tensor_name(ctx_gguf_, i);
-        ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
-        size_t offset      = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
+    file_paths_.push_back(file_path);
+    size_t file_index = file_paths_.size() - 1;
 
-        // LOG_DEBUG("%s", name.c_str());
+    for (auto& tensor_storage : tensor_storages) {
+        // LOG_DEBUG("%s", tensor_storage.name.c_str());
 
-        if (!starts_with(name, prefix)) {
-            name = prefix + name;
+        if (!starts_with(tensor_storage.name, prefix)) {
+            tensor_storage.name = prefix + tensor_storage.name;
         }
-
-        TensorStorage tensor_storage(name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset);
-
-        GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());
+        tensor_storage.file_index = file_index;
 
         add_tensor_storage(tensor_storage);
     }
 
-    gguf_free(ctx_gguf_);
-    ggml_free(ctx_meta_);
-
     return true;
 }
 
 /*================================================= SafeTensorsModelLoader ==================================================*/
 
-ggml_type str_to_ggml_type(const std::string& dtype) {
-    ggml_type ttype = GGML_TYPE_COUNT;
-    if (dtype == "F16") {
-        ttype = GGML_TYPE_F16;
-    } else if (dtype == "BF16") {
-        ttype = GGML_TYPE_BF16;
-    } else if (dtype == "F32") {
-        ttype = GGML_TYPE_F32;
-    } else if (dtype == "F64") {
-        ttype = GGML_TYPE_F32;
-    } else if (dtype == "F8_E4M3") {
-        ttype = GGML_TYPE_F16;
-    } else if (dtype == "F8_E5M2") {
-        ttype = GGML_TYPE_F16;
-    } else if (dtype == "I64") {
-        ttype = GGML_TYPE_I32;
-    }
-    return ttype;
-}
-
-// https://huggingface.co/docs/safetensors/index
 bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const std::string& prefix) {
     LOG_DEBUG("init from '%s', prefix = '%s'", file_path.c_str(), prefix.c_str());
-    file_paths_.push_back(file_path);
-    size_t file_index = file_paths_.size() - 1;
-    std::ifstream file(file_path, std::ios::binary);
-    if (!file.is_open()) {
-        LOG_ERROR("failed to open '%s'", file_path.c_str());
-        file_paths_.pop_back();
-        return false;
-    }
 
-    // get file size
-    file.seekg(0, file.end);
-    size_t file_size_ = file.tellg();
-    file.seekg(0, file.beg);
-
-    // read header size
-    if (file_size_ <= ST_HEADER_SIZE_LEN) {
-        LOG_ERROR("invalid safetensor file '%s'", file_path.c_str());
-        file_paths_.pop_back();
-        return false;
-    }
-
-    uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
-    file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
-    if (!file) {
-        LOG_ERROR("read safetensors header size failed: '%s'", file_path.c_str());
+    std::vector<TensorStorage> tensor_storages;
+    std::string error;
+    if (!read_safetensors_file(file_path, tensor_storages, &error)) {
+        LOG_ERROR("%s", error.c_str());
         return false;
     }
 
-    size_t header_size_ = read_u64(header_size_buf);
-    if (header_size_ >= file_size_) {
-        LOG_ERROR("invalid safetensor file '%s'", file_path.c_str());
-        file_paths_.pop_back();
-        return false;
-    }
-
-    // read header
-    std::vector<char> header_buf;
-    header_buf.resize(header_size_ + 1);
-    header_buf[header_size_] = '\0';
-    file.read(header_buf.data(), header_size_);
-    if (!file) {
-        LOG_ERROR("read safetensors header failed: '%s'", file_path.c_str());
-        file_paths_.pop_back();
-        return false;
-    }
-
-    nlohmann::json header_;
-    try {
-        header_ = nlohmann::json::parse(header_buf.data());
-    } catch (const std::exception&) {
-        LOG_ERROR("parsing safetensors header failed", file_path.c_str());
-        file_paths_.pop_back();
-        return false;
-    }
-
-    for (auto& item : header_.items()) {
-        std::string name           = item.key();
-        nlohmann::json tensor_info = item.value();
-        // LOG_DEBUG("%s %s\n", name.c_str(), tensor_info.dump().c_str());
-
-        if (name == "__metadata__") {
-            continue;
-        }
-
-        if (is_unused_tensor(name)) {
-            continue;
-        }
-
-        std::string dtype    = tensor_info["dtype"];
-        nlohmann::json shape = tensor_info["shape"];
+    file_paths_.push_back(file_path);
+    size_t file_index = file_paths_.size() - 1;
 
-        if (dtype == "U8") {
+    for (auto& tensor_storage : tensor_storages) {
+        if (is_unused_tensor(tensor_storage.name)) {
             continue;
         }
 
-        size_t begin = tensor_info["data_offsets"][0].get<size_t>();
-        size_t end   = tensor_info["data_offsets"][1].get<size_t>();
-
-        ggml_type type = str_to_ggml_type(dtype);
-        if (type == GGML_TYPE_COUNT) {
-            LOG_ERROR("unsupported dtype '%s' (tensor '%s')", dtype.c_str(), name.c_str());
-            return false;
-        }
-
-        if (shape.size() > SD_MAX_DIMS) {
-            LOG_ERROR("invalid tensor '%s'", name.c_str());
-            return false;
-        }
-
-        int n_dims              = (int)shape.size();
-        int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
-        for (int i = 0; i < n_dims; i++) {
-            ne[i] = shape[i].get<int64_t>();
-        }
-
-        if (n_dims == 5) {
-            n_dims = 4;
-            ne[0]  = ne[0] * ne[1];
-            ne[1]  = ne[2];
-            ne[2]  = ne[3];
-            ne[3]  = ne[4];
-        }
-
-        // ggml_n_dims returns 1 for scalars
-        if (n_dims == 0) {
-            n_dims = 1;
-        }
-
-        if (!starts_with(name, prefix)) {
-            name = prefix + name;
-        }
-
-        TensorStorage tensor_storage(name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin);
-        tensor_storage.reverse_ne();
-
-        size_t tensor_data_size = end - begin;
-
-        bool tensor_size_ok;
-        if (dtype == "F8_E4M3") {
-            tensor_storage.is_f8_e4m3 = true;
-            // f8 -> f16
-            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
-        } else if (dtype == "F8_E5M2") {
-            tensor_storage.is_f8_e5m2 = true;
-            // f8 -> f16
-            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
-        } else if (dtype == "F64") {
-            tensor_storage.is_f64 = true;
-            // f64 -> f32
-            tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
-        } else if (dtype == "I64") {
-            tensor_storage.is_i64 = true;
-            // i64 -> i32
-            tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
-        } else {
-            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size);
-        }
-        if (!tensor_size_ok) {
-            LOG_ERROR("size mismatch for tensor '%s' (%s)\n", name.c_str(), dtype.c_str());
-            return false;
+        if (!starts_with(tensor_storage.name, prefix)) {
+            tensor_storage.name = prefix + tensor_storage.name;
         }
+        tensor_storage.file_index = file_index;
 
         add_tensor_storage(tensor_storage);
 
-        // LOG_DEBUG("%s %s", tensor_storage.to_string().c_str(), dtype.c_str());
+        // LOG_DEBUG("%s", tensor_storage.to_string().c_str());
     }
 
     return true;
@@ -644,362 +357,30 @@ bool ModelLoader::init_from_diffusers_file(const std::string& file_path, const s
 
 /*================================================= CkptModelLoader ==================================================*/
 
-// $ python -m pickletools sd-v1-4/archive/data.pkl | head -n 100
-//     0: \x80 PROTO      2
-//     2: }    EMPTY_DICT
-//     3: q    BINPUT     0
-//     5: (    MARK
-//     6: X        BINUNICODE 'epoch'
-//    16: q        BINPUT     1
-//    18: K        BININT1    6
-//    20: X        BINUNICODE 'global_step'
-//    36: q        BINPUT     2
-//    38: J        BININT     470000
-//    43: X        BINUNICODE 'pytorch-lightning_version'
-//    73: q        BINPUT     3
-//    75: X        BINUNICODE '1.4.2'
-//    85: q        BINPUT     4
-//    87: X        BINUNICODE 'state_dict'
-//   102: q        BINPUT     5
-//   104: }        EMPTY_DICT
-//   105: q        BINPUT     6
-//   107: (        MARK
-//   108: X            BINUNICODE 'betas'
-//   118: q            BINPUT     7
-//   120: c            GLOBAL     'torch._utils _rebuild_tensor_v2'
-//   153: q            BINPUT     8
-//   155: (            MARK
-//   156: (                MARK
-//   157: X                    BINUNICODE 'storage'
-//   169: q                    BINPUT     9
-//   171: c                    GLOBAL     'torch FloatStorage'
-//   191: q                    BINPUT     10
-//   193: X                    BINUNICODE '0'
-//   199: q                    BINPUT     11
-//   201: X                    BINUNICODE 'cpu'
-//   209: q                    BINPUT     12
-//   211: M                    BININT2    1000
-//   214: t                    TUPLE      (MARK at 156)
-//   215: q                BINPUT     13
-//   217: Q                BINPERSID
-//   218: K                BININT1    0
-//   220: M                BININT2    1000
-//  ...............................
-//  3201: q            BINPUT     250
-//  3203: R            REDUCE
-//  3204: q            BINPUT     251
-//  3206: X            BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.weight'
-//  3264: q            BINPUT     252
-//  3266: h            BINGET     8
-//  3268: (            MARK
-//  3269: (                MARK
-//  3270: h                    BINGET     9
-//  3272: h                    BINGET     10
-//  3274: X                    BINUNICODE '30'
-//  3281: q                    BINPUT     253
-//  3283: h                    BINGET     12
-//  3285: J                    BININT     102400
-//  3290: t                    TUPLE      (MARK at 3269)
-//  3291: q                BINPUT     254
-//  3293: Q                BINPERSID
-//  3294: K                BININT1    0
-//  3296: (                MARK
-//  3297: M                    BININT2    320
-//  3300: M                    BININT2    320
-//  3303: K                    BININT1    1
-//  3305: K                    BININT1    1
-//  3307: t                    TUPLE      (MARK at 3296)
-//  3308: q                BINPUT     255
-//  3310: (                MARK
-//  3311: M                    BININT2    320
-//  3314: K                    BININT1    1
-//  3316: K                    BININT1    1
-//  3318: K                    BININT1    1
-//  3320: t                    TUPLE      (MARK at 3310)
-//  3321: r                LONG_BINPUT 256
-//  3326: \x89             NEWFALSE
-//  3327: h                BINGET     16
-//  3329: )                EMPTY_TUPLE
-//  3330: R                REDUCE
-//  3331: r                LONG_BINPUT 257
-//  3336: t                TUPLE      (MARK at 3268)
-//  3337: r            LONG_BINPUT 258
-//  3342: R            REDUCE
-//  3343: r            LONG_BINPUT 259
-//  3348: X            BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.bias'
-//  3404: r            LONG_BINPUT 260
-//  3409: h            BINGET     8
-//  3411: (            MARK
-//  3412: (                MARK
-//  3413: h                    BINGET     9
-//  3415: h                    BINGET     10
-//  3417: X                    BINUNICODE '31'
-
-struct PickleTensorReader {
-    enum ReadPhase {
-        READ_NAME,
-        READ_DATA,
-        CHECK_SIZE,
-        READ_DIMENS
-    };
-    ReadPhase phase   = READ_NAME;
-    size_t entry_size = 0;
-    int32_t nelements = 0;
-
-    TensorStorage tensor_storage;
-
-    static ggml_type global_type;  // all pickle_tensors data type
-    static bool read_global_type;
-
-    bool read_int_value(uint32_t value) {
-        if (phase == CHECK_SIZE) {
-            if (entry_size == value * ggml_type_size(tensor_storage.type)) {
-                nelements = value;
-                phase     = READ_DIMENS;
-                return true;
-            } else {
-                phase = READ_NAME;
-            }
-        } else if (phase == READ_DIMENS) {
-            if (tensor_storage.n_dims + 1 > SD_MAX_DIMS) {  // too many dimens
-                phase                 = READ_NAME;
-                tensor_storage.n_dims = 0;
-            }
-            if (nelements % value == 0) {
-                tensor_storage.ne[tensor_storage.n_dims] = value;
-                tensor_storage.n_dims++;
-            }
-        }
-        return false;
-    }
-
-    void read_global(const std::string& str) {
-        if (str == "FloatStorage") {
-            if (read_global_type) {
-                global_type      = GGML_TYPE_F32;
-                read_global_type = false;
-            }
-            tensor_storage.type = GGML_TYPE_F32;
-        } else if (str == "HalfStorage") {
-            if (read_global_type) {
-                global_type      = GGML_TYPE_F16;
-                read_global_type = false;
-            }
-            tensor_storage.type = GGML_TYPE_F16;
-        }
-    }
-
-    void read_string(const std::string& str, zip_t* zip, std::string dir) {
-        if (str == "storage") {
-            read_global_type = true;
-        } else if (str != "state_dict") {
-            if (phase == READ_DATA) {
-                std::string entry_name = dir + "data/" + std::string(str);
-
-                size_t i, n = zip_entries_total(zip);
-                for (i = 0; i < n; ++i) {
-                    zip_entry_openbyindex(zip, i);
-                    {
-                        std::string name = zip_entry_name(zip);
-                        if (name == entry_name) {
-                            tensor_storage.index_in_zip = (int)i;
-                            entry_size                  = zip_entry_size(zip);
-                            zip_entry_close(zip);
-                            break;
-                        }
-                    }
-                    zip_entry_close(zip);
-                }
-
-                phase = entry_size > 0 ? CHECK_SIZE : READ_NAME;
-            }
-            if (!read_global_type && phase == READ_NAME) {
-                tensor_storage.name = str;
-                phase               = READ_DATA;
-                tensor_storage.type = global_type;
-            }
-        }
-    }
-};
-
-ggml_type PickleTensorReader::global_type = GGML_TYPE_F32;  // all pickle_tensors data type
-bool PickleTensorReader::read_global_type = false;
-
-int find_char(uint8_t* buffer, int len, char c) {
-    for (int pos = 0; pos < len; pos++) {
-        if (buffer[pos] == c) {
-            return pos;
-        }
-    }
-    return -1;
-}
-
-#define MAX_STRING_BUFFER 512
-
-bool ModelLoader::parse_data_pkl(uint8_t* buffer,
-                                 size_t buffer_size,
-                                 zip_t* zip,
-                                 std::string dir,
-                                 size_t file_index,
-                                 const std::string prefix) {
-    uint8_t* buffer_end = buffer + buffer_size;
-    if (buffer[0] == 0x80) {  // proto
-        if (buffer[1] != 2) {
-            LOG_ERROR("Unsupported protocol\n");
-            return false;
-        }
-        buffer += 2;  // 0x80 and version
-        char string_buffer[MAX_STRING_BUFFER];
-        bool finish = false;
-        PickleTensorReader reader;
-        // read pickle binary file
-        while (!finish && buffer < buffer_end) {
-            uint8_t opcode = *buffer;
-            buffer++;
-            // https://github.com/python/cpython/blob/3.7/Lib/pickletools.py#L1048
-            // https://github.com/python/cpython/blob/main/Lib/pickle.py#L105
-            switch (opcode) {
-                case '}':  // EMPTY_DICT     = b'}'   # push empty dict
-                    break;
-                case ']':  // EMPTY_LIST     = b']'   # push empty list
-                    break;
-                // skip unused sections
-                case 'h':  // BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
-                case 'q':  // BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
-                case 'Q':  // BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
-                    buffer++;
-                    break;
-                case 'r':  // LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
-                    buffer += 4;
-                    break;
-                case 0x95:  // FRAME            = b'\x95'  # indicate the beginning of a new frame
-                    buffer += 8;
-                    break;
-                case 0x94:  // MEMOIZE          = b'\x94'  # store top of the stack in memo
-                    break;
-                case '(':  // MARK           = b'('   # push special markobject on stack
-                    break;
-                case 'K':  // BININT1        = b'K'   # push 1-byte unsigned int
-                {
-                    uint8_t value = *buffer;
-                    if (reader.read_int_value(value)) {
-                        buffer++;
-                    }
-                    buffer++;
-                } break;
-                case 'M':  // BININT2        = b'M'   # push 2-byte unsigned int
-                {
-                    uint16_t value = read_short(buffer);
-                    if (reader.read_int_value(value)) {
-                        buffer++;
-                    }
-                    buffer += 2;
-                } break;
-                case 'J':  // BININT         = b'J'   # push four-byte signed int
-                {
-                    const int32_t value = read_int(buffer);
-                    if (reader.read_int_value(value)) {
-                        buffer++;  // skip tuple after read num_elements
-                    }
-                    buffer += 4;
-                } break;
-                case 'X':  // BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
-                {
-                    const int32_t len = read_int(buffer);
-                    buffer += 4;
-                    memset(string_buffer, 0, MAX_STRING_BUFFER);
-                    if (len > MAX_STRING_BUFFER) {
-                        LOG_WARN("tensor name very large");
-                    }
-                    memcpy(string_buffer, buffer, len < MAX_STRING_BUFFER ? len : (MAX_STRING_BUFFER - 1));
-                    buffer += len;
-                    reader.read_string(string_buffer, zip, dir);
-                } break;
-                case 0x8C:  // SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
-                {
-                    const int8_t len = *buffer;
-                    buffer++;
-                    memset(string_buffer, 0, MAX_STRING_BUFFER);
-                    memcpy(string_buffer, buffer, len);
-                    buffer += len;
-                    // printf("String: '%s'\n", string_buffer);
-                } break;
-                case 'c':  // GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
-                {
-                    int len = find_char(buffer, MAX_STRING_BUFFER, '\n');
-
-                    buffer += len + 1;
-                    len = find_char(buffer, MAX_STRING_BUFFER, '\n');
-
-                    memset(string_buffer, 0, MAX_STRING_BUFFER);
-                    memcpy(string_buffer, buffer, len);
-                    buffer += len + 1;
-                    reader.read_global(string_buffer);
-                } break;
-                case 0x86:  // TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
-                case 0x85:  // TUPLE1         = b'\x85'  # build 1-tuple from stack top
-                case 't':   // TUPLE          = b't'   # build tuple from topmost stack items
-                    if (reader.phase == PickleTensorReader::READ_DIMENS) {
-                        reader.tensor_storage.reverse_ne();
-                        reader.tensor_storage.file_index = file_index;
-                        // if(strcmp(prefix.c_str(), "scarlett") == 0)
-                        // printf(" ZIP got tensor %s \n ", reader.tensor_storage.name.c_str());
-                        std::string name = reader.tensor_storage.name;
-                        if (!starts_with(name, prefix)) {
-                            name = prefix + name;
-                        }
-                        reader.tensor_storage.name = name;
-                        add_tensor_storage(reader.tensor_storage);
-
-                        // LOG_DEBUG("%s", reader.tensor_storage.name.c_str());
-                        // reset
-                        reader = PickleTensorReader();
-                    }
-                    break;
-                case '.':  // STOP           = b'.'   # every pickle ends with STOP
-                    finish = true;
-                    break;
-                default:
-                    break;
-            }
-        }
-    }
-    return true;
-}
-
 bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::string& prefix) {
     LOG_DEBUG("init from '%s'", file_path.c_str());
-    file_paths_.push_back(file_path);
-    size_t file_index = file_paths_.size() - 1;
 
-    zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
-    if (zip == nullptr) {
-        LOG_ERROR("failed to open '%s'", file_path.c_str());
+    std::vector<TensorStorage> tensor_storages;
+    std::string error;
+    if (!read_ckpt_file(file_path, tensor_storages, &error)) {
+        LOG_ERROR("%s", error.c_str());
         return false;
     }
-    int n = (int)zip_entries_total(zip);
-    for (int i = 0; i < n; ++i) {
-        zip_entry_openbyindex(zip, i);
-        {
-            std::string name = zip_entry_name(zip);
-            size_t pos       = name.find("data.pkl");
-            if (pos != std::string::npos) {
-                std::string dir = name.substr(0, pos);
-                printf("ZIP %d, name = %s, dir = %s \n", i, name.c_str(), dir.c_str());
-                void* pkl_data = nullptr;
-                size_t pkl_size;
-                zip_entry_read(zip, &pkl_data, &pkl_size);
-
-                // LOG_DEBUG("%lld", pkl_size);
 
-                parse_data_pkl((uint8_t*)pkl_data, pkl_size, zip, dir, file_index, prefix);
+    file_paths_.push_back(file_path);
+    size_t file_index = file_paths_.size() - 1;
 
-                free(pkl_data);
-            }
+    for (auto& tensor_storage : tensor_storages) {
+        if (!starts_with(tensor_storage.name, prefix)) {
+            tensor_storage.name = prefix + tensor_storage.name;
         }
-        zip_entry_close(zip);
+        tensor_storage.file_index = file_index;
+
+        add_tensor_storage(tensor_storage);
+
+        // LOG_DEBUG("%s", tensor_storage.to_string().c_str());
     }
-    zip_close(zip);
+
     return true;
 }
 
@@ -1703,19 +1084,8 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
 }
 
 bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules_str) {
-    auto backend    = ggml_backend_cpu_init();
-    size_t mem_size = 1 * 1024 * 1024;  // for padding
-    mem_size += tensor_storage_map.size() * ggml_tensor_overhead();
-    mem_size += get_params_mem_size(backend, type);
-    LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
-    ggml_context* ggml_ctx = ggml_init({mem_size, nullptr, false});
-
-    gguf_context* gguf_ctx = gguf_init_empty();
-
     auto tensor_type_rules = parse_tensor_type_rules(tensor_type_rules_str);
-
-    std::mutex tensor_mutex;
-    auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
+    auto get_tensor_type   = [&](const TensorStorage& tensor_storage) -> ggml_type {
         const std::string& name = tensor_storage.name;
         ggml_type tensor_type   = tensor_storage.type;
         ggml_type dst_type      = type;
@@ -1732,6 +1102,28 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
             tensor_type = dst_type;
         }
 
+        return tensor_type;
+    };
+
+    auto backend    = ggml_backend_cpu_init();
+    size_t mem_size = 1 * 1024 * 1024;  // for padding
+    mem_size += tensor_storage_map.size() * ggml_tensor_overhead();
+    mem_size += get_params_mem_size(backend, type);
+    LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
+    ggml_context* ggml_ctx = ggml_init({mem_size, nullptr, false});
+
+    if (ggml_ctx == nullptr) {
+        LOG_ERROR("ggml_init failed for GGUF writer");
+        ggml_backend_free(backend);
+        return false;
+    }
+
+    std::vector<ggml_tensor*> tensors;
+    std::mutex tensor_mutex;
+    auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
+        const std::string& name = tensor_storage.name;
+        ggml_type tensor_type   = get_tensor_type(tensor_storage);
+
         std::lock_guard<std::mutex> lock(tensor_mutex);
         ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
         if (tensor == nullptr) {
@@ -1754,8 +1146,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
         }
 
         *dst_tensor = tensor;
-
-        gguf_add_tensor(gguf_ctx, tensor);
+        tensors.push_back(tensor);
 
         return true;
     };
@@ -1763,12 +1154,17 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
     bool success = load_tensors(on_new_tensor_cb);
     ggml_backend_free(backend);
     LOG_INFO("load tensors done");
-    LOG_INFO("trying to save tensors to %s", file_path.c_str());
+
+    std::string error;
     if (success) {
-        gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
+        success = write_gguf_file(file_path, tensors, &error);
     }
+
+    if (!success && !error.empty()) {
+        LOG_ERROR("%s", error.c_str());
+    }
+
     ggml_free(ggml_ctx);
-    gguf_free(gguf_ctx);
     return success;
 }
 
diff --git a/src/model.h b/src/model.h
index e481a0b9f..de15431f4 100644
--- a/src/model.h
+++ b/src/model.h
@@ -5,20 +5,13 @@
 #include <map>
 #include <memory>
 #include <set>
-#include <sstream>
 #include <string>
-#include <tuple>
-#include <utility>
 #include <vector>
 
 #include "ggml-backend.h"
 #include "ggml.h"
-#include "gguf.h"
-#include "json.hpp"
+#include "model_io/tensor_storage.h"
 #include "ordered_map.hpp"
-#include "zip.h"
-
-#define SD_MAX_DIMS 5
 
 enum SDVersion {
     VERSION_SD1,
@@ -195,115 +188,6 @@ enum PMVersion {
     PM_VERSION_2,
 };
 
-struct TensorStorage {
-    std::string name;
-    ggml_type type          = GGML_TYPE_F32;
-    ggml_type expected_type = GGML_TYPE_COUNT;
-    bool is_f8_e4m3         = false;
-    bool is_f8_e5m2         = false;
-    bool is_f64             = false;
-    bool is_i64             = false;
-    int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
-    int n_dims              = 0;
-
-    size_t file_index = 0;
-    int index_in_zip  = -1;  // >= means stored in a zip file
-    uint64_t offset   = 0;   // offset in file
-
-    TensorStorage() = default;
-
-    TensorStorage(std::string name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
-        : name(std::move(name)), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
-        for (int i = 0; i < n_dims; i++) {
-            this->ne[i] = ne[i];
-        }
-    }
-
-    int64_t nelements() const {
-        int64_t n = 1;
-        for (int i = 0; i < SD_MAX_DIMS; i++) {
-            n *= ne[i];
-        }
-        return n;
-    }
-
-    int64_t nbytes() const {
-        return nelements() * ggml_type_size(type) / ggml_blck_size(type);
-    }
-
-    int64_t nbytes_to_read() const {
-        if (is_f8_e4m3 || is_f8_e5m2) {
-            return nbytes() / 2;
-        } else if (is_f64 || is_i64) {
-            return nbytes() * 2;
-        } else {
-            return nbytes();
-        }
-    }
-
-    void unsqueeze() {
-        if (n_dims == 2) {
-            n_dims = 4;
-            ne[3]  = ne[1];
-            ne[2]  = ne[0];
-            ne[1]  = 1;
-            ne[0]  = 1;
-        }
-    }
-
-    std::vector<TensorStorage> chunk(size_t n) {
-        std::vector<TensorStorage> chunks;
-        uint64_t chunk_size = nbytes_to_read() / n;
-        // printf("%d/%d\n", chunk_size, nbytes_to_read());
-        reverse_ne();
-        for (size_t i = 0; i < n; i++) {
-            TensorStorage chunk_i = *this;
-            chunk_i.ne[0]         = ne[0] / n;
-            chunk_i.offset        = offset + i * chunk_size;
-            chunk_i.reverse_ne();
-            chunks.push_back(chunk_i);
-        }
-        reverse_ne();
-        return chunks;
-    }
-
-    void reverse_ne() {
-        int64_t new_ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
-        for (int i = 0; i < n_dims; i++) {
-            new_ne[i] = ne[n_dims - 1 - i];
-        }
-        for (int i = 0; i < n_dims; i++) {
-            ne[i] = new_ne[i];
-        }
-    }
-
-    std::string to_string() const {
-        std::stringstream ss;
-        const char* type_name = ggml_type_name(type);
-        if (is_f8_e4m3) {
-            type_name = "f8_e4m3";
-        } else if (is_f8_e5m2) {
-            type_name = "f8_e5m2";
-        } else if (is_f64) {
-            type_name = "f64";
-        } else if (is_i64) {
-            type_name = "i64";
-        }
-        ss << name << " | " << type_name << " | ";
-        ss << n_dims << " [";
-        for (int i = 0; i < SD_MAX_DIMS; i++) {
-            ss << ne[i];
-            if (i != SD_MAX_DIMS - 1) {
-                ss << ", ";
-            }
-        }
-        ss << "]";
-        return ss.str();
-    }
-};
-
-typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
-
 typedef OrderedMap<std::string, TensorStorage> String2TensorStorage;
 
 class ModelLoader {
@@ -314,13 +198,6 @@ class ModelLoader {
 
     void add_tensor_storage(const TensorStorage& tensor_storage);
 
-    bool parse_data_pkl(uint8_t* buffer,
-                        size_t buffer_size,
-                        zip_t* zip,
-                        std::string dir,
-                        size_t file_index,
-                        const std::string prefix);
-
     bool init_from_gguf_file(const std::string& file_path, const std::string& prefix = "");
     bool init_from_safetensors_file(const std::string& file_path, const std::string& prefix = "");
     bool init_from_ckpt_file(const std::string& file_path, const std::string& prefix = "");
diff --git a/src/model_io/ckpt_io.cpp b/src/model_io/ckpt_io.cpp
new file mode 100644
index 000000000..63fd262d5
--- /dev/null
+++ b/src/model_io/ckpt_io.cpp
@@ -0,0 +1,403 @@
+#include "ckpt_io.h"
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "zip.h"
+
+static constexpr int MAX_STRING_BUFFER = 512;
+
+static void set_error(std::string* error, const std::string& message) {
+    if (error != nullptr) {
+        *error = message;
+    }
+}
+
+static int32_t read_int(const uint8_t* buffer) {
+    // little endian
+    uint32_t value = 0;
+    value |= static_cast<uint32_t>(buffer[3]) << 24;
+    value |= static_cast<uint32_t>(buffer[2]) << 16;
+    value |= static_cast<uint32_t>(buffer[1]) << 8;
+    value |= static_cast<uint32_t>(buffer[0]);
+    return static_cast<int32_t>(value);
+}
+
+static uint16_t read_short(const uint8_t* buffer) {
+    // little endian
+    uint16_t value = 0;
+    value |= static_cast<uint16_t>(buffer[1]) << 8;
+    value |= static_cast<uint16_t>(buffer[0]);
+    return value;
+}
+
+bool is_ckpt_file(const std::string& file_path) {
+    zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
+    if (zip == nullptr) {
+        return false;
+    }
+    zip_close(zip);
+    return true;
+}
+
+/*================================================= CkptModelLoader ==================================================*/
+
+// $ python -m pickletools sd-v1-4/archive/data.pkl | head -n 100
+//     0: \x80 PROTO      2
+//     2: }    EMPTY_DICT
+//     3: q    BINPUT     0
+//     5: (    MARK
+//     6: X        BINUNICODE 'epoch'
+//    16: q        BINPUT     1
+//    18: K        BININT1    6
+//    20: X        BINUNICODE 'global_step'
+//    36: q        BINPUT     2
+//    38: J        BININT     470000
+//    43: X        BINUNICODE 'pytorch-lightning_version'
+//    73: q        BINPUT     3
+//    75: X        BINUNICODE '1.4.2'
+//    85: q        BINPUT     4
+//    87: X        BINUNICODE 'state_dict'
+//   102: q        BINPUT     5
+//   104: }        EMPTY_DICT
+//   105: q        BINPUT     6
+//   107: (        MARK
+//   108: X            BINUNICODE 'betas'
+//   118: q            BINPUT     7
+//   120: c            GLOBAL     'torch._utils _rebuild_tensor_v2'
+//   153: q            BINPUT     8
+//   155: (            MARK
+//   156: (                MARK
+//   157: X                    BINUNICODE 'storage'
+//   169: q                    BINPUT     9
+//   171: c                    GLOBAL     'torch FloatStorage'
+//   191: q                    BINPUT     10
+//   193: X                    BINUNICODE '0'
+//   199: q                    BINPUT     11
+//   201: X                    BINUNICODE 'cpu'
+//   209: q                    BINPUT     12
+//   211: M                    BININT2    1000
+//   214: t                    TUPLE      (MARK at 156)
+//   215: q                BINPUT     13
+//   217: Q                BINPERSID
+//   218: K                BININT1    0
+//   220: M                BININT2    1000
+//  ...............................
+//  3201: q            BINPUT     250
+//  3203: R            REDUCE
+//  3204: q            BINPUT     251
+//  3206: X            BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.weight'
+//  3264: q            BINPUT     252
+//  3266: h            BINGET     8
+//  3268: (            MARK
+//  3269: (                MARK
+//  3270: h                    BINGET     9
+//  3272: h                    BINGET     10
+//  3274: X                    BINUNICODE '30'
+//  3281: q                    BINPUT     253
+//  3283: h                    BINGET     12
+//  3285: J                    BININT     102400
+//  3290: t                    TUPLE      (MARK at 3269)
+//  3291: q                BINPUT     254
+//  3293: Q                BINPERSID
+//  3294: K                BININT1    0
+//  3296: (                MARK
+//  3297: M                    BININT2    320
+//  3300: M                    BININT2    320
+//  3303: K                    BININT1    1
+//  3305: K                    BININT1    1
+//  3307: t                    TUPLE      (MARK at 3296)
+//  3308: q                BINPUT     255
+//  3310: (                MARK
+//  3311: M                    BININT2    320
+//  3314: K                    BININT1    1
+//  3316: K                    BININT1    1
+//  3318: K                    BININT1    1
+//  3320: t                    TUPLE      (MARK at 3310)
+//  3321: r                LONG_BINPUT 256
+//  3326: \x89             NEWFALSE
+//  3327: h                BINGET     16
+//  3329: )                EMPTY_TUPLE
+//  3330: R                REDUCE
+//  3331: r                LONG_BINPUT 257
+//  3336: t                TUPLE      (MARK at 3268)
+//  3337: r            LONG_BINPUT 258
+//  3342: R            REDUCE
+//  3343: r            LONG_BINPUT 259
+//  3348: X            BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.bias'
+//  3404: r            LONG_BINPUT 260
+//  3409: h            BINGET     8
+//  3411: (            MARK
+//  3412: (                MARK
+//  3413: h                    BINGET     9
+//  3415: h                    BINGET     10
+//  3417: X                    BINUNICODE '31'
+
+struct PickleTensorReader {
+    enum ReadPhase {
+        READ_NAME,
+        READ_DATA,
+        CHECK_SIZE,
+        READ_DIMENS
+    };
+    ReadPhase phase   = READ_NAME;
+    size_t entry_size = 0;
+    int32_t nelements = 0;
+
+    TensorStorage tensor_storage;
+
+    static ggml_type global_type;  // all pickle_tensors data type
+    static bool read_global_type;
+
+    bool read_int_value(uint32_t value) {
+        if (phase == CHECK_SIZE) {
+            if (entry_size == value * ggml_type_size(tensor_storage.type)) {
+                nelements = value;
+                phase     = READ_DIMENS;
+                return true;
+            } else {
+                phase = READ_NAME;
+            }
+        } else if (phase == READ_DIMENS) {
+            if (tensor_storage.n_dims + 1 > SD_MAX_DIMS) {  // too many dimens
+                phase                 = READ_NAME;
+                tensor_storage.n_dims = 0;
+            }
+            if (nelements % value == 0) {
+                tensor_storage.ne[tensor_storage.n_dims] = value;
+                tensor_storage.n_dims++;
+            }
+        }
+        return false;
+    }
+
+    void read_global(const std::string& str) {
+        if (str == "FloatStorage") {
+            if (read_global_type) {
+                global_type      = GGML_TYPE_F32;
+                read_global_type = false;
+            }
+            tensor_storage.type = GGML_TYPE_F32;
+        } else if (str == "HalfStorage") {
+            if (read_global_type) {
+                global_type      = GGML_TYPE_F16;
+                read_global_type = false;
+            }
+            tensor_storage.type = GGML_TYPE_F16;
+        }
+    }
+
+    void read_string(const std::string& str, zip_t* zip, std::string dir) {
+        if (str == "storage") {
+            read_global_type = true;
+        } else if (str != "state_dict") {
+            if (phase == READ_DATA) {
+                std::string entry_name = dir + "data/" + std::string(str);
+
+                size_t i, n = zip_entries_total(zip);
+                for (i = 0; i < n; ++i) {
+                    zip_entry_openbyindex(zip, i);
+                    {
+                        std::string name = zip_entry_name(zip);
+                        if (name == entry_name) {
+                            tensor_storage.index_in_zip = (int)i;
+                            entry_size                  = zip_entry_size(zip);
+                            zip_entry_close(zip);
+                            break;
+                        }
+                    }
+                    zip_entry_close(zip);
+                }
+
+                phase = entry_size > 0 ? CHECK_SIZE : READ_NAME;
+            }
+            if (!read_global_type && phase == READ_NAME) {
+                tensor_storage.name = str;
+                phase               = READ_DATA;
+                tensor_storage.type = global_type;
+            }
+        }
+    }
+};
+
+ggml_type PickleTensorReader::global_type = GGML_TYPE_F32;  // all pickle_tensors data type
+bool PickleTensorReader::read_global_type = false;
+
+static int find_char(uint8_t* buffer, int len, char c) {
+    for (int pos = 0; pos < len; pos++) {
+        if (buffer[pos] == c) {
+            return pos;
+        }
+    }
+    return -1;
+}
+
+static bool parse_data_pkl(uint8_t* buffer,
+                           size_t buffer_size,
+                           zip_t* zip,
+                           std::string dir,
+                           std::vector<TensorStorage>& tensor_storages,
+                           std::string* error) {
+    uint8_t* buffer_end = buffer + buffer_size;
+    if (buffer[0] == 0x80) {  // proto
+        if (buffer[1] != 2) {
+            set_error(error, "unsupported pickle protocol");
+            return false;
+        }
+        buffer += 2;  // 0x80 and version
+        char string_buffer[MAX_STRING_BUFFER];
+        bool finish = false;
+        PickleTensorReader reader;
+        // read pickle binary file
+        while (!finish && buffer < buffer_end) {
+            uint8_t opcode = *buffer;
+            buffer++;
+            // https://github.com/python/cpython/blob/3.7/Lib/pickletools.py#L1048
+            // https://github.com/python/cpython/blob/main/Lib/pickle.py#L105
+            switch (opcode) {
+                case '}':  // EMPTY_DICT     = b'}'   # push empty dict
+                    break;
+                case ']':  // EMPTY_LIST     = b']'   # push empty list
+                    break;
+                // skip unused sections
+                case 'h':  // BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
+                case 'q':  // BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
+                case 'Q':  // BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
+                    buffer++;
+                    break;
+                case 'r':  // LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
+                    buffer += 4;
+                    break;
+                case 0x95:  // FRAME            = b'\x95'  # indicate the beginning of a new frame
+                    buffer += 8;
+                    break;
+                case 0x94:  // MEMOIZE          = b'\x94'  # store top of the stack in memo
+                    break;
+                case '(':  // MARK           = b'('   # push special markobject on stack
+                    break;
+                case 'K':  // BININT1        = b'K'   # push 1-byte unsigned int
+                {
+                    uint8_t value = *buffer;
+                    if (reader.read_int_value(value)) {
+                        buffer++;
+                    }
+                    buffer++;
+                } break;
+                case 'M':  // BININT2        = b'M'   # push 2-byte unsigned int
+                {
+                    uint16_t value = read_short(buffer);
+                    if (reader.read_int_value(value)) {
+                        buffer++;
+                    }
+                    buffer += 2;
+                } break;
+                case 'J':  // BININT         = b'J'   # push four-byte signed int
+                {
+                    const int32_t value = read_int(buffer);
+                    if (reader.read_int_value(value)) {
+                        buffer++;  // skip tuple after read num_elements
+                    }
+                    buffer += 4;
+                } break;
+                case 'X':  // BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
+                {
+                    const int32_t len = read_int(buffer);
+                    buffer += 4;
+                    memset(string_buffer, 0, MAX_STRING_BUFFER);
+                    if (len > MAX_STRING_BUFFER) {
+                        // keep truncated names null-terminated, matching the old parser behavior
+                    }
+                    memcpy(string_buffer, buffer, len < MAX_STRING_BUFFER ? len : (MAX_STRING_BUFFER - 1));
+                    buffer += len;
+                    reader.read_string(string_buffer, zip, dir);
+                } break;
+                case 0x8C:  // SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
+                {
+                    const int8_t len = *buffer;
+                    buffer++;
+                    memset(string_buffer, 0, MAX_STRING_BUFFER);
+                    memcpy(string_buffer, buffer, len);
+                    buffer += len;
+                    // printf("String: '%s'\n", string_buffer);
+                } break;
+                case 'c':  // GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
+                {
+                    int len = find_char(buffer, MAX_STRING_BUFFER, '\n');
+
+                    buffer += len + 1;
+                    len = find_char(buffer, MAX_STRING_BUFFER, '\n');
+
+                    memset(string_buffer, 0, MAX_STRING_BUFFER);
+                    memcpy(string_buffer, buffer, len);
+                    buffer += len + 1;
+                    reader.read_global(string_buffer);
+                } break;
+                case 0x86:  // TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
+                case 0x85:  // TUPLE1         = b'\x85'  # build 1-tuple from stack top
+                case 't':   // TUPLE          = b't'   # build tuple from topmost stack items
+                    if (reader.phase == PickleTensorReader::READ_DIMENS) {
+                        reader.tensor_storage.reverse_ne();
+                        tensor_storages.push_back(reader.tensor_storage);
+
+                        // LOG_DEBUG("%s", reader.tensor_storage.name.c_str());
+                        // reset
+                        reader = PickleTensorReader();
+                    }
+                    break;
+                case '.':  // STOP           = b'.'   # every pickle ends with STOP
+                    finish = true;
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+    return true;
+}
+
+bool read_ckpt_file(const std::string& file_path,
+                    std::vector<TensorStorage>& tensor_storages,
+                    std::string* error) {
+    zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
+    if (zip == nullptr) {
+        set_error(error, "failed to open '" + file_path + "'");
+        return false;
+    }
+
+    tensor_storages.clear();
+    bool success = true;
+    int n        = (int)zip_entries_total(zip);
+    for (int i = 0; i < n; ++i) {
+        zip_entry_openbyindex(zip, i);
+        {
+            std::string name = zip_entry_name(zip);
+            size_t pos       = name.find("data.pkl");
+            if (pos != std::string::npos) {
+                std::string dir = name.substr(0, pos);
+                printf("ZIP %d, name = %s, dir = %s \n", i, name.c_str(), dir.c_str());
+                void* pkl_data = nullptr;
+                size_t pkl_size;
+                zip_entry_read(zip, &pkl_data, &pkl_size);
+
+                // LOG_DEBUG("%lld", pkl_size);
+
+                if (!parse_data_pkl((uint8_t*)pkl_data, pkl_size, zip, dir, tensor_storages, error)) {
+                    success = false;
+                }
+
+                free(pkl_data);
+            }
+        }
+        zip_entry_close(zip);
+
+        if (!success) {
+            break;
+        }
+    }
+    zip_close(zip);
+    return success;
+}
diff --git a/src/model_io/ckpt_io.h b/src/model_io/ckpt_io.h
new file mode 100644
index 000000000..72667ce22
--- /dev/null
+++ b/src/model_io/ckpt_io.h
@@ -0,0 +1,14 @@
+#ifndef __SD_MODEL_IO_CKPT_IO_H__
+#define __SD_MODEL_IO_CKPT_IO_H__
+
+#include <string>
+#include <vector>
+
+#include "tensor_storage.h"
+
+bool is_ckpt_file(const std::string& file_path);
+bool read_ckpt_file(const std::string& file_path,
+                    std::vector<TensorStorage>& tensor_storages,
+                    std::string* error = nullptr);
+
+#endif  // __SD_MODEL_IO_CKPT_IO_H__
diff --git a/src/model_io/gguf_io.cpp b/src/model_io/gguf_io.cpp
new file mode 100644
index 000000000..36aea4a1d
--- /dev/null
+++ b/src/model_io/gguf_io.cpp
@@ -0,0 +1,122 @@
+#include "gguf_io.h"
+
+#include <cstdint>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "gguf.h"
+#include "gguf_reader_ext.h"
+#include "util.h"
+
+static void set_error(std::string* error, const std::string& message) {
+    if (error != nullptr) {
+        *error = message;
+    }
+}
+
+bool is_gguf_file(const std::string& file_path) {
+    std::ifstream file(file_path, std::ios::binary);
+    if (!file.is_open()) {
+        return false;
+    }
+
+    char magic[4];
+
+    file.read(magic, sizeof(magic));
+    if (!file) {
+        return false;
+    }
+    for (uint32_t i = 0; i < sizeof(magic); i++) {
+        if (magic[i] != GGUF_MAGIC[i]) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool read_gguf_file(const std::string& file_path,
+                    std::vector<TensorStorage>& tensor_storages,
+                    std::string* error) {
+    tensor_storages.clear();
+
+    gguf_context* ctx_gguf_ = nullptr;
+    ggml_context* ctx_meta_ = nullptr;
+
+    ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_});
+    if (!ctx_gguf_) {
+        GGUFReader gguf_reader;
+        if (!gguf_reader.load(file_path)) {
+            set_error(error, "failed to open '" + file_path + "' with GGUFReader");
+            return false;
+        }
+
+        size_t data_offset = gguf_reader.data_offset();
+        for (const auto& gguf_tensor_info : gguf_reader.tensors()) {
+            TensorStorage tensor_storage(
+                gguf_tensor_info.name,
+                gguf_tensor_info.type,
+                gguf_tensor_info.shape.data(),
+                static_cast<int>(gguf_tensor_info.shape.size()),
+                0,
+                data_offset + gguf_tensor_info.offset);
+
+            tensor_storages.push_back(tensor_storage);
+        }
+
+        return true;
+    }
+
+    int n_tensors = static_cast<int>(gguf_get_n_tensors(ctx_gguf_));
+
+    size_t data_offset = gguf_get_data_offset(ctx_gguf_);
+    for (int i = 0; i < n_tensors; i++) {
+        std::string name   = gguf_get_tensor_name(ctx_gguf_, i);
+        ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
+        size_t offset      = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
+
+        TensorStorage tensor_storage(name, dummy->type, dummy->ne, ggml_n_dims(dummy), 0, offset);
+
+        if (ggml_nbytes(dummy) != tensor_storage.nbytes()) {
+            gguf_free(ctx_gguf_);
+            ggml_free(ctx_meta_);
+            set_error(error, "size mismatch for tensor '" + name + "'");
+            return false;
+        }
+
+        tensor_storages.push_back(tensor_storage);
+    }
+
+    gguf_free(ctx_gguf_);
+    ggml_free(ctx_meta_);
+
+    return true;
+}
+
+bool write_gguf_file(const std::string& file_path,
+                     const std::vector<ggml_tensor*>& tensors,
+                     std::string* error) {
+    gguf_context* gguf_ctx = gguf_init_empty();
+    if (gguf_ctx == nullptr) {
+        set_error(error, "gguf_init_empty failed");
+        return false;
+    }
+
+    for (ggml_tensor* tensor : tensors) {
+        if (tensor == nullptr) {
+            set_error(error, "null tensor cannot be written to GGUF");
+            gguf_free(gguf_ctx);
+            return false;
+        }
+        gguf_add_tensor(gguf_ctx, tensor);
+    }
+
+    LOG_INFO("trying to save tensors to %s", file_path.c_str());
+    bool success = gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
+    if (!success) {
+        set_error(error, "failed to write GGUF file '" + file_path + "'");
+    }
+    gguf_free(gguf_ctx);
+    return success;
+}
diff --git a/src/model_io/gguf_io.h b/src/model_io/gguf_io.h
new file mode 100644
index 000000000..912e879f0
--- /dev/null
+++ b/src/model_io/gguf_io.h
@@ -0,0 +1,17 @@
+#ifndef __SD_MODEL_IO_GGUF_IO_H__
+#define __SD_MODEL_IO_GGUF_IO_H__
+
+#include <string>
+#include <vector>
+
+#include "tensor_storage.h"
+
+bool is_gguf_file(const std::string& file_path);
+bool read_gguf_file(const std::string& file_path,
+                    std::vector<TensorStorage>& tensor_storages,
+                    std::string* error = nullptr);
+bool write_gguf_file(const std::string& file_path,
+                     const std::vector<ggml_tensor*>& tensors,
+                     std::string* error = nullptr);
+
+#endif  // __SD_MODEL_IO_GGUF_IO_H__
diff --git a/src/gguf_reader.hpp b/src/model_io/gguf_reader_ext.h
similarity index 98%
rename from src/gguf_reader.hpp
rename to src/model_io/gguf_reader_ext.h
index 9a2ceebcf..95f0027fc 100644
--- a/src/gguf_reader.hpp
+++ b/src/model_io/gguf_reader_ext.h
@@ -1,5 +1,5 @@
-#ifndef __GGUF_READER_HPP__
-#define __GGUF_READER_HPP__
+#ifndef __SD_MODEL_IO_GGUF_READER_EXT_H__
+#define __SD_MODEL_IO_GGUF_READER_EXT_H__
 
 #include <cstdint>
 #include <fstream>
@@ -231,4 +231,4 @@ class GGUFReader {
     size_t data_offset() const { return data_offset_; }
 };
 
-#endif  // __GGUF_READER_HPP__
+#endif  // __SD_MODEL_IO_GGUF_READER_EXT_H__
diff --git a/src/model_io/safetensors_io.cpp b/src/model_io/safetensors_io.cpp
new file mode 100644
index 000000000..1ae485214
--- /dev/null
+++ b/src/model_io/safetensors_io.cpp
@@ -0,0 +1,236 @@
+#include "safetensors_io.h"
+
+#include <cstdint>
+#include <exception>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "json.hpp"
+
+static constexpr size_t ST_HEADER_SIZE_LEN = 8;
+
+static void set_error(std::string* error, const std::string& message) {
+    if (error != nullptr) {
+        *error = message;
+    }
+}
+
+static uint64_t read_u64(const uint8_t* buffer) {
+    // little endian
+    uint64_t value = 0;
+    value |= static_cast<uint64_t>(buffer[7]) << 56;
+    value |= static_cast<uint64_t>(buffer[6]) << 48;
+    value |= static_cast<uint64_t>(buffer[5]) << 40;
+    value |= static_cast<uint64_t>(buffer[4]) << 32;
+    value |= static_cast<uint64_t>(buffer[3]) << 24;
+    value |= static_cast<uint64_t>(buffer[2]) << 16;
+    value |= static_cast<uint64_t>(buffer[1]) << 8;
+    value |= static_cast<uint64_t>(buffer[0]);
+    return value;
+}
+
+bool is_safetensors_file(const std::string& file_path) {
+    std::ifstream file(file_path, std::ios::binary);
+    if (!file.is_open()) {
+        return false;
+    }
+
+    // get file size
+    file.seekg(0, file.end);
+    size_t file_size_ = file.tellg();
+    file.seekg(0, file.beg);
+
+    // read header size
+    if (file_size_ <= ST_HEADER_SIZE_LEN) {
+        return false;
+    }
+
+    uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
+    file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
+    if (!file) {
+        return false;
+    }
+
+    size_t header_size_ = read_u64(header_size_buf);
+    if (header_size_ >= file_size_ || header_size_ <= 2) {
+        return false;
+    }
+
+    // read header
+    std::vector<char> header_buf;
+    header_buf.resize(header_size_ + 1);
+    header_buf[header_size_] = '\0';
+    file.read(header_buf.data(), header_size_);
+    if (!file) {
+        return false;
+    }
+    try {
+        nlohmann::json header_ = nlohmann::json::parse(header_buf.data());
+    } catch (const std::exception&) {
+        return false;
+    }
+    return true;
+}
+
+static ggml_type str_to_ggml_type(const std::string& dtype) {
+    ggml_type ttype = GGML_TYPE_COUNT;
+    if (dtype == "F16") {
+        ttype = GGML_TYPE_F16;
+    } else if (dtype == "BF16") {
+        ttype = GGML_TYPE_BF16;
+    } else if (dtype == "F32") {
+        ttype = GGML_TYPE_F32;
+    } else if (dtype == "F64") {
+        ttype = GGML_TYPE_F32;
+    } else if (dtype == "F8_E4M3") {
+        ttype = GGML_TYPE_F16;
+    } else if (dtype == "F8_E5M2") {
+        ttype = GGML_TYPE_F16;
+    } else if (dtype == "I32") {
+        ttype = GGML_TYPE_I32;
+    } else if (dtype == "I64") {
+        ttype = GGML_TYPE_I32;
+    }
+    return ttype;
+}
+
+// https://huggingface.co/docs/safetensors/index
+bool read_safetensors_file(const std::string& file_path,
+                           std::vector<TensorStorage>& tensor_storages,
+                           std::string* error) {
+    std::ifstream file(file_path, std::ios::binary);
+    if (!file.is_open()) {
+        set_error(error, "failed to open '" + file_path + "'");
+        return false;
+    }
+
+    // get file size
+    file.seekg(0, file.end);
+    size_t file_size_ = file.tellg();
+    file.seekg(0, file.beg);
+
+    // read header size
+    if (file_size_ <= ST_HEADER_SIZE_LEN) {
+        set_error(error, "invalid safetensor file '" + file_path + "'");
+        return false;
+    }
+
+    uint8_t header_size_buf[ST_HEADER_SIZE_LEN];
+    file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN);
+    if (!file) {
+        set_error(error, "read safetensors header size failed: '" + file_path + "'");
+        return false;
+    }
+
+    size_t header_size_ = read_u64(header_size_buf);
+    if (header_size_ >= file_size_) {
+        set_error(error, "invalid safetensor file '" + file_path + "'");
+        return false;
+    }
+
+    // read header
+    std::vector<char> header_buf;
+    header_buf.resize(header_size_ + 1);
+    header_buf[header_size_] = '\0';
+    file.read(header_buf.data(), header_size_);
+    if (!file) {
+        set_error(error, "read safetensors header failed: '" + file_path + "'");
+        return false;
+    }
+
+    nlohmann::json header_;
+    try {
+        header_ = nlohmann::json::parse(header_buf.data());
+    } catch (const std::exception&) {
+        set_error(error, "parsing safetensors header failed: '" + file_path + "'");
+        return false;
+    }
+
+    tensor_storages.clear();
+    for (auto& item : header_.items()) {
+        std::string name           = item.key();
+        nlohmann::json tensor_info = item.value();
+        // LOG_DEBUG("%s %s\n", name.c_str(), tensor_info.dump().c_str());
+
+        if (name == "__metadata__") {
+            continue;
+        }
+
+        std::string dtype    = tensor_info["dtype"];
+        nlohmann::json shape = tensor_info["shape"];
+
+        if (dtype == "U8") {
+            continue;
+        }
+
+        size_t begin = tensor_info["data_offsets"][0].get<size_t>();
+        size_t end   = tensor_info["data_offsets"][1].get<size_t>();
+
+        ggml_type type = str_to_ggml_type(dtype);
+        if (type == GGML_TYPE_COUNT) {
+            set_error(error, "unsupported dtype '" + dtype + "' (tensor '" + name + "')");
+            return false;
+        }
+
+        if (shape.size() > SD_MAX_DIMS) {
+            set_error(error, "invalid tensor '" + name + "'");
+            return false;
+        }
+
+        int n_dims              = (int)shape.size();
+        int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
+        for (int i = 0; i < n_dims; i++) {
+            ne[i] = shape[i].get<int64_t>();
+        }
+
+        if (n_dims == 5) {
+            n_dims = 4;
+            ne[0]  = ne[0] * ne[1];
+            ne[1]  = ne[2];
+            ne[2]  = ne[3];
+            ne[3]  = ne[4];
+        }
+
+        // ggml_n_dims returns 1 for scalars
+        if (n_dims == 0) {
+            n_dims = 1;
+        }
+
+        TensorStorage tensor_storage(name, type, ne, n_dims, 0, ST_HEADER_SIZE_LEN + header_size_ + begin);
+        tensor_storage.reverse_ne();
+
+        size_t tensor_data_size = end - begin;
+
+        bool tensor_size_ok;
+        if (dtype == "F8_E4M3") {
+            tensor_storage.is_f8_e4m3 = true;
+            // f8 -> f16
+            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
+        } else if (dtype == "F8_E5M2") {
+            tensor_storage.is_f8_e5m2 = true;
+            // f8 -> f16
+            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size * 2);
+        } else if (dtype == "F64") {
+            tensor_storage.is_f64 = true;
+            // f64 -> f32
+            tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
+        } else if (dtype == "I64") {
+            tensor_storage.is_i64 = true;
+            // i64 -> i32
+            tensor_size_ok = (tensor_storage.nbytes() * 2 == tensor_data_size);
+        } else {
+            tensor_size_ok = (tensor_storage.nbytes() == tensor_data_size);
+        }
+        if (!tensor_size_ok) {
+            set_error(error, "size mismatch for tensor '" + name + "' (" + dtype + ")");
+            return false;
+        }
+
+        tensor_storages.push_back(tensor_storage);
+
+        // LOG_DEBUG("%s %s", tensor_storage.to_string().c_str(), dtype.c_str());
+    }
+
+    return true;
+}
diff --git a/src/model_io/safetensors_io.h b/src/model_io/safetensors_io.h
new file mode 100644
index 000000000..a746e8ac4
--- /dev/null
+++ b/src/model_io/safetensors_io.h
@@ -0,0 +1,14 @@
+#ifndef __SD_MODEL_IO_SAFETENSORS_IO_H__
+#define __SD_MODEL_IO_SAFETENSORS_IO_H__
+
+#include <string>
+#include <vector>
+
+#include "tensor_storage.h"
+
+bool is_safetensors_file(const std::string& file_path);
+bool read_safetensors_file(const std::string& file_path,
+                           std::vector<TensorStorage>& tensor_storages,
+                           std::string* error = nullptr);
+
+#endif  // __SD_MODEL_IO_SAFETENSORS_IO_H__
diff --git a/src/model_io/tensor_storage.h b/src/model_io/tensor_storage.h
new file mode 100644
index 000000000..20b58a19d
--- /dev/null
+++ b/src/model_io/tensor_storage.h
@@ -0,0 +1,125 @@
+#ifndef __SD_TENSOR_STORAGE_H__
+#define __SD_TENSOR_STORAGE_H__
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "ggml.h"
+
+#define SD_MAX_DIMS 5
+
+struct TensorStorage {
+    std::string name;
+    ggml_type type          = GGML_TYPE_F32;
+    ggml_type expected_type = GGML_TYPE_COUNT;
+    bool is_f8_e4m3         = false;
+    bool is_f8_e5m2         = false;
+    bool is_f64             = false;
+    bool is_i64             = false;
+    int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
+    int n_dims              = 0;
+
+    size_t file_index = 0;
+    int index_in_zip  = -1;  // >= means stored in a zip file
+    uint64_t offset   = 0;   // offset in file
+
+    TensorStorage() = default;
+
+    TensorStorage(std::string name, ggml_type type, const int64_t* ne, int n_dims, size_t file_index, size_t offset = 0)
+        : name(std::move(name)), type(type), n_dims(n_dims), file_index(file_index), offset(offset) {
+        for (int i = 0; i < n_dims; i++) {
+            this->ne[i] = ne[i];
+        }
+    }
+
+    int64_t nelements() const {
+        int64_t n = 1;
+        for (int i = 0; i < SD_MAX_DIMS; i++) {
+            n *= ne[i];
+        }
+        return n;
+    }
+
+    int64_t nbytes() const {
+        return nelements() * ggml_type_size(type) / ggml_blck_size(type);
+    }
+
+    int64_t nbytes_to_read() const {
+        if (is_f8_e4m3 || is_f8_e5m2) {
+            return nbytes() / 2;
+        } else if (is_f64 || is_i64) {
+            return nbytes() * 2;
+        } else {
+            return nbytes();
+        }
+    }
+
+    void unsqueeze() {
+        if (n_dims == 2) {
+            n_dims = 4;
+            ne[3]  = ne[1];
+            ne[2]  = ne[0];
+            ne[1]  = 1;
+            ne[0]  = 1;
+        }
+    }
+
+    std::vector<TensorStorage> chunk(size_t n) {
+        std::vector<TensorStorage> chunks;
+        uint64_t chunk_size = nbytes_to_read() / n;
+        // printf("%d/%d\n", chunk_size, nbytes_to_read());
+        reverse_ne();
+        for (size_t i = 0; i < n; i++) {
+            TensorStorage chunk_i = *this;
+            chunk_i.ne[0]         = ne[0] / n;
+            chunk_i.offset        = offset + i * chunk_size;
+            chunk_i.reverse_ne();
+            chunks.push_back(chunk_i);
+        }
+        reverse_ne();
+        return chunks;
+    }
+
+    void reverse_ne() {
+        int64_t new_ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
+        for (int i = 0; i < n_dims; i++) {
+            new_ne[i] = ne[n_dims - 1 - i];
+        }
+        for (int i = 0; i < n_dims; i++) {
+            ne[i] = new_ne[i];
+        }
+    }
+
+    std::string to_string() const {
+        std::stringstream ss;
+        const char* type_name = ggml_type_name(type);
+        if (is_f8_e4m3) {
+            type_name = "f8_e4m3";
+        } else if (is_f8_e5m2) {
+            type_name = "f8_e5m2";
+        } else if (is_f64) {
+            type_name = "f64";
+        } else if (is_i64) {
+            type_name = "i64";
+        }
+        ss << name << " | " << type_name << " | ";
+        ss << n_dims << " [";
+        for (int i = 0; i < SD_MAX_DIMS; i++) {
+            ss << ne[i];
+            if (i != SD_MAX_DIMS - 1) {
+                ss << ", ";
+            }
+        }
+        ss << "]";
+        return ss.str();
+    }
+};
+
+typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
+
+#endif  // __SD_TENSOR_STORAGE_H__