diff --git a/CHANGELOG.md b/CHANGELOG.md
index d2e4937c9..03240454f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- feat: Update llama.cpp to ggerganov/llama.cpp@c0159f9c1f874da15e94f371d136f5920b4b5335 by @abetlen in #2161
 - fix: Handle embedding models without KV memory and test embeddings with a real GGUF embedding model by @abetlen in #2160
 - fix(ci): Shrink CUDA wheel fatbins so CUDA releases stay under GitHub's asset size limit by @abetlen in #2158
 
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
index e51492c56..5a6c06b07 100644
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@@ -1314,6 +1314,22 @@ def llama_model_load_from_splits(
     ...
 
 
+# // Load a model from an open FILE pointer
+# LLAMA_API struct llama_model * llama_model_load_from_file_ptr(
+#                                FILE * file,
+#           struct llama_model_params   params);
+@ctypes_function(
+    "llama_model_load_from_file_ptr",
+    [ctypes.c_void_p, llama_model_params],
+    llama_model_p_ctypes,
+)
+def llama_model_load_from_file_ptr(
+    file: ctypes.c_void_p, params: llama_model_params, /
+) -> Optional[llama_model_p]:
+    """Load a model from an open FILE pointer."""
+    ...
+
+
 # LLAMA_API void llama_model_save_to_file(
 #         const struct llama_model * model,
 #                     const char * path_model);
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index 49bfddeca..c0159f9c1 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 49bfddeca18e62fa3d39114a23e9fcbdf8a22388
+Subproject commit c0159f9c1f874da15e94f371d136f5920b4b5335