Skip to content

Commit f3bdded

Browse files
committed
Merge branch 'master' into xsn/server_response_generator_refactor
2 parents efd73cf + ee8dd5c commit f3bdded

File tree

24 files changed

+3214
-1426
lines changed

24 files changed

+3214
-1426
lines changed

.github/workflows/build.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,3 +1651,50 @@ jobs:
16511651
run: |
16521652
GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
16531653
1654+
ggml-ci-arm64-graviton4-kleidiai:
1655+
runs-on: ah-ubuntu_22_04-c8g_8x
1656+
1657+
steps:
1658+
- name: Clone
1659+
id: checkout
1660+
uses: actions/checkout@v4
1661+
1662+
- name: Dependencies
1663+
id: depends
1664+
run: |
1665+
set -euxo pipefail
1666+
sudo apt-get update
1667+
sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \
1668+
apt-get install -y \
1669+
build-essential \
1670+
libcurl4-openssl-dev \
1671+
python3-venv \
1672+
gpg \
1673+
wget \
1674+
time \
1675+
git-lfs
1676+
1677+
git lfs install
1678+
1679+
# install the latest cmake
1680+
sudo install -d /usr/share/keyrings
1681+
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc \
1682+
| gpg --dearmor \
1683+
| sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
1684+
echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' \
1685+
| sudo tee /etc/apt/sources.list.d/kitware.list
1686+
sudo apt-get update
1687+
sudo apt-get install -y cmake
1688+
1689+
- name: ccache
1690+
uses: ggml-org/ccache-action@v1.2.16
1691+
with:
1692+
key: ggml-ci-arm64-graviton4-kleidiai
1693+
evict-old-files: 1d
1694+
1695+
- name: Test
1696+
id: ggml-ci
1697+
run: |
1698+
GG_BUILD_KLEIDIAI=1 \
1699+
GG_BUILD_EXTRA_TESTS_0=1 \
1700+
bash ./ci/run.sh ./tmp/results ./tmp/mnt

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_
9292

9393
# 3rd party libs
9494
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
95+
option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
9596
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
9697
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
9798

@@ -200,7 +201,9 @@ endif()
200201

201202
if (LLAMA_BUILD_COMMON)
202203
add_subdirectory(common)
203-
add_subdirectory(vendor/cpp-httplib)
204+
if (LLAMA_HTTPLIB)
205+
add_subdirectory(vendor/cpp-httplib)
206+
endif()
204207
endif()
205208

206209
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)

build-xcframework.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,8 @@ cmake -B build-visionos -G Xcode \
454454
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
455455
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
456456
-DLLAMA_CURL=OFF \
457+
-DLLAMA_HTTPLIB=OFF \
458+
-DLLAMA_BUILD_SERVER=OFF \
457459
-S .
458460
cmake --build build-visionos --config Release -- -quiet
459461

@@ -468,6 +470,8 @@ cmake -B build-visionos-sim -G Xcode \
468470
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
469471
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
470472
-DLLAMA_CURL=OFF \
473+
-DLLAMA_HTTPLIB=OFF \
474+
-DLLAMA_BUILD_SERVER=OFF \
471475
-S .
472476
cmake --build build-visionos-sim --config Release -- -quiet
473477

ci/run.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,12 @@ fi
121121
if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
122122
echo ">>===== Enabling KleidiAI support"
123123

124-
CANDIDATES=("armv9-a+dotprod+i8mm" "armv8.6-a+dotprod+i8mm" "armv8.2-a+dotprod")
124+
CANDIDATES=(
125+
"armv9-a+dotprod+i8mm+sve2"
126+
"armv9-a+dotprod+i8mm"
127+
"armv8.6-a+dotprod+i8mm"
128+
"armv8.2-a+dotprod"
129+
)
125130
CPU=""
126131

127132
for cpu in "${CANDIDATES[@]}"; do

common/CMakeLists.txt

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -91,47 +91,12 @@ if (LLAMA_CURL)
9191
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
9292
include_directories(${CURL_INCLUDE_DIRS})
9393
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
94-
else()
94+
elseif (LLAMA_HTTPLIB)
9595
# otherwise, use cpp-httplib
96+
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
9697
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
9798
endif()
9899

99-
if (LLAMA_OPENSSL)
100-
find_package(OpenSSL)
101-
if (OpenSSL_FOUND)
102-
include(CheckCSourceCompiles)
103-
set(SAVED_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
104-
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
105-
check_c_source_compiles("
106-
#include <openssl/opensslv.h>
107-
#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
108-
# if OPENSSL_VERSION_NUMBER < 0x1010107f
109-
# error bad version
110-
# endif
111-
#else
112-
# if OPENSSL_VERSION_NUMBER < 0x30000000L
113-
# error bad version
114-
# endif
115-
#endif
116-
int main() { return 0; }
117-
" OPENSSL_VERSION_SUPPORTED)
118-
set(CMAKE_REQUIRED_INCLUDES ${SAVED_CMAKE_REQUIRED_INCLUDES})
119-
if (OPENSSL_VERSION_SUPPORTED)
120-
message(STATUS "OpenSSL found: ${OPENSSL_VERSION}")
121-
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT)
122-
target_link_libraries(${TARGET} PUBLIC OpenSSL::SSL OpenSSL::Crypto)
123-
if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
124-
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
125-
find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED)
126-
find_library(SECURITY_FRAMEWORK Security REQUIRED)
127-
target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK})
128-
endif()
129-
endif()
130-
else()
131-
message(STATUS "OpenSSL not found, SSL support disabled")
132-
endif()
133-
endif()
134-
135100
if (LLAMA_LLGUIDANCE)
136101
include(ExternalProject)
137102
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)

common/download.cpp

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#if defined(LLAMA_USE_CURL)
2121
#include <curl/curl.h>
2222
#include <curl/easy.h>
23-
#else
23+
#elif defined(LLAMA_USE_HTTPLIB)
2424
#include "http.h"
2525
#endif
2626

@@ -467,7 +467,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
467467
return { res_code, std::move(res_buffer) };
468468
}
469469

470-
#else
470+
#elif defined(LLAMA_USE_HTTPLIB)
471471

472472
static bool is_output_a_tty() {
473473
#if defined(_WIN32)
@@ -713,6 +713,8 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
713713

714714
#endif // LLAMA_USE_CURL
715715

716+
#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
717+
716718
static bool common_download_file_single(const std::string & url,
717719
const std::string & path,
718720
const std::string & bearer_token,
@@ -907,33 +909,6 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
907909
return { hf_repo, ggufFile, mmprojFile };
908910
}
909911

910-
std::vector<common_cached_model_info> common_list_cached_models() {
911-
std::vector<common_cached_model_info> models;
912-
const std::string cache_dir = fs_get_cache_directory();
913-
const std::vector<common_file_info> files = fs_list_files(cache_dir);
914-
for (const auto & file : files) {
915-
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
916-
common_cached_model_info model_info;
917-
model_info.manifest_path = file.path;
918-
std::string fname = file.name;
919-
string_replace_all(fname, ".json", ""); // remove extension
920-
auto parts = string_split<std::string>(fname, '=');
921-
if (parts.size() == 4) {
922-
// expect format: manifest=<user>=<model>=<tag>=<other>
923-
model_info.user = parts[1];
924-
model_info.model = parts[2];
925-
model_info.tag = parts[3];
926-
} else {
927-
// invalid format
928-
continue;
929-
}
930-
model_info.size = 0; // TODO: get GGUF size, not manifest size
931-
models.push_back(model_info);
932-
}
933-
}
934-
return models;
935-
}
936-
937912
//
938913
// Docker registry functions
939914
//
@@ -1052,3 +1027,46 @@ std::string common_docker_resolve_model(const std::string & docker) {
10521027
throw;
10531028
}
10541029
}
1030+
1031+
#else
1032+
1033+
common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
1034+
throw std::runtime_error("download functionality is not enabled in this build");
1035+
}
1036+
1037+
bool common_download_model(const common_params_model &, const std::string &, bool) {
1038+
throw std::runtime_error("download functionality is not enabled in this build");
1039+
}
1040+
1041+
std::string common_docker_resolve_model(const std::string &) {
1042+
throw std::runtime_error("download functionality is not enabled in this build");
1043+
}
1044+
1045+
#endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
1046+
1047+
std::vector<common_cached_model_info> common_list_cached_models() {
1048+
std::vector<common_cached_model_info> models;
1049+
const std::string cache_dir = fs_get_cache_directory();
1050+
const std::vector<common_file_info> files = fs_list_files(cache_dir);
1051+
for (const auto & file : files) {
1052+
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
1053+
common_cached_model_info model_info;
1054+
model_info.manifest_path = file.path;
1055+
std::string fname = file.name;
1056+
string_replace_all(fname, ".json", ""); // remove extension
1057+
auto parts = string_split<std::string>(fname, '=');
1058+
if (parts.size() == 4) {
1059+
// expect format: manifest=<user>=<model>=<tag>=<other>
1060+
model_info.user = parts[1];
1061+
model_info.model = parts[2];
1062+
model_info.tag = parts[3];
1063+
} else {
1064+
// invalid format
1065+
continue;
1066+
}
1067+
model_info.size = 0; // TODO: get GGUF size, not manifest size
1068+
models.push_back(model_info);
1069+
}
1070+
}
1071+
return models;
1072+
}

docs/backend/CANN.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,12 @@ Converting the matmul weight format from ND to NZ to improve performance. Enable
313313

314314
### GGML_CANN_ACL_GRAPH
315315

316-
Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default.
316+
Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default. This option is only effective if `USE_ACL_GRAPH` was enabled at compilation time. To enable it, recompile using:
317+
318+
```sh
319+
cmake -B build -DGGML_CANN=on -DCMAKE_BUILD_TYPE=release -DUSE_ACL_GRAPH=ON
320+
cmake --build build --config release
321+
```
317322

318323
### GGML_CANN_GRAPH_CACHE_CAPACITY
319324

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,35 @@ void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
448448
ggml_cann_release_resources(ctx, norm, acl_src, acl_dst);
449449
}
450450

451+
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
452+
ggml_tensor * src = dst->src[0];
453+
454+
aclTensor * acl_src = ggml_cann_create_tensor(src);
455+
aclTensor * acl_dst = ggml_cann_create_tensor(dst);
456+
457+
size_t type_size = ggml_type_size(src->type);
458+
int64_t n_bytes = src->ne[3]* src->ne[2]* src->ne[1]* type_size;
459+
ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes);
460+
void * buffer = temp_buffer_allocator.get();
461+
462+
int64_t div_ne[] = {1, src->ne[1], src->ne[2], src->ne[3]};
463+
size_t div_nb[GGML_MAX_DIMS];
464+
div_nb[0] = sizeof(float);
465+
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
466+
div_nb[i] = div_nb[i - 1] * div_ne[i - 1];
467+
}
468+
aclTensor * acl_div = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, div_ne, div_nb, GGML_MAX_DIMS);
469+
470+
std::vector<int64_t> norm_dims = { 3 };
471+
aclIntArray * dims_array = aclCreateIntArray(norm_dims.data(), norm_dims.size());
472+
473+
float p_value = 2.0f;
474+
aclScalar * p_scalar = aclCreateScalar(&p_value, aclDataType::ACL_FLOAT);
475+
GGML_CANN_CALL_ACLNN_OP(ctx, Norm, acl_src, p_scalar, dims_array, true, acl_div);
476+
GGML_CANN_CALL_ACLNN_OP(ctx, Div, acl_src, acl_div, acl_dst);
477+
ggml_cann_release_resources(ctx, dims_array, p_scalar, acl_src, acl_dst, acl_div);
478+
}
479+
451480
void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
452481
ggml_tensor * src = dst->src[0];
453482

ggml/src/ggml-cann/aclnn_ops.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include <aclnnop/aclnn_cos.h>
4747
#include <aclnnop/aclnn_log.h>
4848
#include <aclnnop/aclnn_sign.h>
49+
#include <aclnnop/aclnn_norm.h>
4950
#include "acl_tensor.h"
5051
#include "common.h"
5152

@@ -187,6 +188,29 @@ void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
187188
*/
188189
void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
189190

191+
/**
192+
* @brief Computes the L2 Normalization for a ggml tensor using the CANN
193+
* backend.
194+
*
195+
* @details This function applies the L2 Normalization operation on the
196+
* input tensor `src` and stores the result in the destination tensor
197+
* `dst`. L2 Normalization scales the input tensor such that the
198+
* L2 norm along the specified dimension equals 1. This operation
199+
* is commonly used in neural networks for feature normalization
200+
* and vector scaling.
201+
* The operation is defined as:
202+
* \f[
203+
* \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
204+
* \f]
205+
* The normalization is performed along the last dimension by default.
206+
*
207+
* @param ctx The CANN context used for operations.
208+
* @param dst The destination tensor where the normalized values will be stored.
209+
* @attention The normalization is performed along the last dimension of the
210+
* input tensor by default.
211+
*/
212+
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
213+
190214
/**
191215
* @brief Computes the Group Normalization for a ggml tensor using the CANN
192216
* backend.

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
17771777
case GGML_OP_GROUP_NORM:
17781778
ggml_cann_group_norm(ctx, dst);
17791779
break;
1780+
case GGML_OP_L2_NORM:
1781+
ggml_cann_l2_norm(ctx, dst);
1782+
break;
17801783
case GGML_OP_CONCAT:
17811784
ggml_cann_concat(ctx, dst);
17821785
break;
@@ -2515,6 +2518,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
25152518
// value of paddingW should be at most half of kernelW
25162519
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
25172520
}
2521+
case GGML_OP_L2_NORM:
25182522
case GGML_OP_DUP:
25192523
case GGML_OP_SUM:
25202524
case GGML_OP_IM2COL:

0 commit comments

Comments
 (0)