Skip to content

Commit 6e88404

Browse files
authored
Merge branch 'ggml-org:master' into mradermacher
2 parents b15d331 + 0d88315 commit 6e88404

File tree

6 files changed

+31
-31
lines changed

6 files changed

+31
-31
lines changed

ggml/src/ggml-backend.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,11 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
10711071
}
10721072
}
10731073
}
1074+
// if the node is still unassigned, assign it to the first backend that supports it
1075+
for (int b = 0; b < sched->n_backends && *cur_backend_id == -1; b++) {
1076+
ggml_backend_sched_set_if_supported(sched, node, b, cur_backend_id);
1077+
}
1078+
GGML_ASSERT(*cur_backend_id != -1);
10741079
}
10751080

10761081
// pass 5: split graph, find tensors that need to be copied
@@ -1098,7 +1103,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
10981103

10991104
const int node_backend_id = tensor_backend_id(node);
11001105

1101-
assert(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
1106+
GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
11021107

11031108
// check if we should start a new split based on the sources of the current node
11041109
bool need_new_split = false;
@@ -1156,7 +1161,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
11561161

11571162
size_t src_id = hash_id(src);
11581163
const int src_backend_id = sched->hv_tensor_backend_ids[src_id];
1159-
assert(src_backend_id != -1); // all inputs should be assigned by now
1164+
GGML_ASSERT(src_backend_id != -1); // all inputs should be assigned by now
11601165

11611166
if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
11621167
if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) {

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
// ggml-backend interface
3737

38-
std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type() {
38+
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
3939
static std::vector<ggml_backend_buffer_type_t> bufts = []() {
4040
std::vector<ggml_backend_buffer_type_t> bufts;
4141

@@ -57,23 +57,27 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
5757
}
5858
#endif
5959

60-
bufts.push_back(NULL);
61-
6260
return bufts;
6361
}();
6462

6563
return bufts;
6664
}
6765

6866
static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
69-
return ggml_backend_cpu_get_extra_buffers_type().data();
67+
static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
68+
std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
69+
bufts.push_back(nullptr);
70+
return bufts;
71+
}();
72+
73+
return extra_bufts.data();
7074

7175
GGML_UNUSED(device);
7276
}
7377

7478
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
75-
for (auto * extra : ggml_backend_cpu_get_extra_buffers_type()) {
76-
if (extra && extra == buft) {
79+
for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
80+
if (extra == buft) {
7781
return true;
7882
}
7983
}
@@ -397,20 +401,13 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
397401
return true;
398402
}
399403

400-
// extra_buffer_op?
401-
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
402-
if (extra) {
403-
auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
404-
if (buf_extra && buf_extra->supports_op(dev, op)) {
405-
return true;
406-
}
407-
}
408-
}
409-
410-
// the other case need host buffer.
411-
for (int i = 0; i < GGML_MAX_SRC; i++) {
412-
if (op->src[i] && op->src[i]->buffer && !ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
413-
return false;
404+
// check extra buffer types
405+
// note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
406+
for (int i = 0; i < 4; i++) {
407+
if (op->src[i] && op->src[i]->buffer &&
408+
ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
409+
auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
410+
return buf_extra->supports_op(dev, op);
414411
}
415412
}
416413

ggml/src/ggml-cpu/traits.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ extra_buffer_type::~extra_buffer_type() {}
1010
} // namespace ggml::cpu
1111

1212
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {
13-
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
13+
for (auto extra : ggml_backend_cpu_get_extra_buffer_types()) {
1414
if (extra && extra->context) {
1515
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
1616
auto tensor_traits = buf_extra->get_tensor_traits(op);
@@ -23,7 +23,7 @@ bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct
2323
}
2424

2525
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size) {
26-
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
26+
for (auto extra : ggml_backend_cpu_get_extra_buffer_types()) {
2727
if (extra && extra->context) {
2828
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
2929
auto tensor_traits = buf_extra->get_tensor_traits(op);

ggml/src/ggml-cpu/traits.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,6 @@ class extra_buffer_type {
3333
} // namespace ggml::cpu
3434

3535
// implemented in ggml-cpu.cpp.
36-
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
36+
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types();
3737

3838
#endif

src/llama-chat.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,11 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
193193
return LLM_CHAT_TEMPLATE_LLAMA4;
194194
} else if (tmpl_contains("<|endofuserprompt|>")) {
195195
return LLM_CHAT_TEMPLATE_DOTS1;
196-
} else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
196+
} else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) {
197197
return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
198198
} else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
199199
return LLM_CHAT_TEMPLATE_OPENAI_MOE;
200-
} else if (tmpl_contains("<|hy_place▁holder▁no▁2|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
200+
} else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
201201
return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
202202
} else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
203203
return LLM_CHAT_TEMPLATE_KIMI_K2;
@@ -625,8 +625,6 @@ int32_t llm_chat_apply_template(
625625
} else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
626626
// Yandex template ("\n\n" is defined as EOT token)
627627

628-
ss << "<s>";
629-
630628
for (size_t i = 0; i < chat.size(); i++) {
631629
std::string role(chat[i]->role);
632630
if (role == "user") {

tests/test-chat-template.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,9 @@ int main(void) {
277277
{
278278
/* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct",
279279
/* .template_str= */ "<s>{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n {%- set name = tool.function.name %}\n {%- set description = tool.function.description|default('') %}\n {%- set parameters = tool.function.parameters|tojson %}\n {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n {{- tools_prefix }}\n {%- for tool in tools %}\n {{- __render_tool(tool) }}\n {%- endfor %}\n {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n {{- names.assistant }}\n {%- set call = message['function_call'] %}\n {%- if call %}\n {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n {%- else %}\n {{- ' ' + message.content + '\\n\\n' }}\n {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {{- __render_user_message(message) }}\n {%- endif %}\n {%- if message.role == 'assistant' and not loop.last %}\n {{- __render_assistant_message(message) }}\n {%- endif %}\n {%- if message.role == 'tool' %}\n {{- __render_tool_message(message) }}\n {%- endif %}\n {%- if loop.last %}\n {{- ' Ассистент:[SEP]' }}\n {%- endif %}\n{%- endfor %}\n",
280-
/* .expected_output= */ "<s> Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
280+
/* .expected_output= */ " Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
281281
/* .expected_output_jinja= */ "<s> Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
282-
/* .bos_token= */ "",
282+
/* .bos_token= */ "<s>",
283283
/* .eos_token= */ "",
284284
},
285285
{

0 commit comments

Comments
 (0)