Skip to content

Commit 2409189

Browse files
committed
[API] Add ML LXM Service API(internal) for large model interactons
This commit introduces the ML LXM Service API, a new C API designed to facilitate interactions with large-scale models such as Large Language Models (LLMs) Signed-off-by: hyunil park <hyunil46.park@samsung.com>
1 parent e4d25a8 commit 2409189

File tree

3 files changed

+550
-1
lines changed

3 files changed

+550
-1
lines changed
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
/* SPDX-License-Identifier: Apache-2.0 */
2+
/**
3+
* @file ml-lxm-service-internal.h
4+
* @date 23 JULY 2025
5+
* @brief Machine Learning LXM(LLM, LVM, etc.) Service API
6+
* @see https://github.com/nnstreamer/api
7+
* @author Hyunil Park <hyunil46.park@samsung.com>
8+
* @bug No known bugs except for NYI items
9+
*/
10+
11+
/**
12+
* @example sample_lxm_service.c
13+
* @brief Sample application demonstrating ML LXM Service API usage
14+
*
15+
* This sample shows how to:
16+
* - Create and configure an LXM session
17+
* - Build prompts with text and instructions
18+
* - Generate streaming responses with custom options
19+
* - Handle token callbacks for real-time processing
20+
*
21+
* Configuration file example (config.json):
22+
* @code
23+
* {
24+
* "single" :
25+
* {
26+
* "framework" : "flare",
27+
* "model" : ["sflare_if_4bit_3b.bin"],
28+
* "adapter" : ["history_lora.bin"],
29+
* "custom" : "tokenizer_path:tokenizer.json,backend:CPU,output_size:1024,model_type:3B,data_type:W4A32",
30+
* "invoke_dynamic" : "true",
31+
* }
32+
* }
33+
* @endcode
34+
*
35+
* Basic usage workflow:
36+
* @code
37+
* // 1. Create session
38+
* ml_lxm_session_h session;
39+
* ml_lxm_session_create(&session, "/path/to/config.json", NULL);
40+
*
41+
* // 2. Create prompt
42+
* ml_lxm_prompt_h prompt;
43+
* ml_lxm_prompt_create(&prompt);
44+
* ml_lxm_prompt_append_text(prompt, "Hello AI");
45+
*
46+
* // 3. Generate response with options
47+
* ml_lxm_generation_options_s options = {1.0, 50};
48+
* ml_lxm_session_respond(session, prompt, &options, token_handler, NULL);
49+
*
50+
* // 4. Cleanup
51+
* ml_lxm_prompt_destroy(prompt);
52+
* ml_lxm_session_destroy(session);
53+
* @endcode
54+
*
55+
* Complete example with token callback:
56+
* @code
57+
* #include "ml-lxm-service-internal.h"
58+
* #include <iostream>
59+
*
60+
* static void token_handler(ml_service_event_e event,
61+
* ml_information_h event_data,
62+
* void *user_data);
63+
*
64+
* int main() {
65+
* ml_lxm_session_h session = NULL;
66+
* ml_lxm_prompt_h prompt = NULL;
67+
* int ret;
68+
*
69+
* // Check availability first
70+
* ml_lxm_availability_e status;
71+
* ret = ml_lxm_check_availability(&status);
72+
* if (ret != ML_ERROR_NONE || status != ML_LXM_AVAILABILITY_AVAILABLE) {
73+
* std::cout << "LXM service not available" << std::endl;
74+
* return -1;
75+
* }
76+
*
77+
* // 1. Create session with config and instructions
78+
* ret = ml_lxm_session_create(&session, "/path/to/config.json", "You are a helpful AI assistant");
79+
* if (ret != ML_ERROR_NONE) {
80+
* std::cout << "Failed to create session" << std::endl;
81+
* return -1;
82+
* }
83+
*
84+
* // 2. Create prompt
85+
* ret = ml_lxm_prompt_create(&prompt);
86+
* if (ret != ML_ERROR_NONE) {
87+
* std::cout << "Failed to create prompt" << std::endl;
88+
* ml_lxm_session_destroy(session);
89+
* return -1;
90+
* }
91+
*
92+
* // Add text to prompt
93+
* ret = ml_lxm_prompt_append_text(prompt, "Explain quantum computing in simple terms");
94+
* if (ret != ML_ERROR_NONE) {
95+
* std::cout << "Failed to append text to prompt" << std::endl;
96+
* ml_lxm_prompt_destroy(prompt);
97+
* ml_lxm_session_destroy(session);
98+
* return -1;
99+
* }
100+
*
101+
* // 3. Generate response with custom options
102+
* ml_lxm_generation_options_s options = {
103+
* .temperature = 1.2,
104+
* .max_tokens = 128
105+
* };
106+
*
107+
* std::cout << "AI Response: ";
108+
* ret = ml_lxm_session_respond(session, prompt, &options, token_handler, NULL);
109+
* if (ret != ML_ERROR_NONE) {
110+
* std::cout << "Failed to generate response" << std::endl;
111+
* }
112+
* std::cout << std::endl;
113+
*
114+
* // 4. Cleanup
115+
* ml_lxm_prompt_destroy(prompt);
116+
* ml_lxm_session_destroy(session);
117+
*
118+
* return 0;
119+
* }
120+
*
121+
* static void token_handler(ml_service_event_e event,
122+
* ml_information_h event_data,
123+
* void *user_data) {
124+
* ml_tensors_data_h data = NULL;
125+
* void *_raw = NULL;
126+
* size_t _size = 0;
127+
* int ret;
128+
*
129+
* switch (event) {
130+
* case ML_SERVICE_EVENT_NEW_DATA:
131+
* if (event_data != NULL) {
132+
* ret = ml_information_get(event_data, "data", &data);
133+
* if (ret == ML_ERROR_NONE) {
134+
* ret = ml_tensors_data_get_tensor_data(data, 0U, &_raw, &_size);
135+
* if (ret == ML_ERROR_NONE && _raw != NULL && _size > 0) {
136+
* std::cout.write(static_cast<const char *>(_raw), _size);
137+
* std::cout.flush();
138+
* }
139+
* }
140+
* }
141+
* break;
142+
* default:
143+
* break;
144+
* }
145+
* }
146+
* @endcode
147+
*/
148+
149+
#ifndef __ML_LXM_SERVICE_INTERNAL_H__
150+
#define __ML_LXM_SERVICE_INTERNAL_H__
151+
152+
#include <stdlib.h>
153+
#include <ml-api-service.h>
154+
#ifdef __cplusplus
155+
extern "C"
156+
{
157+
#endif
158+
159+
/**
160+
* @brief Enumeration for LXM service availability status.
161+
*/
162+
typedef enum
163+
{
164+
ML_LXM_AVAILABILITY_AVAILABLE = 0,
165+
ML_LXM_AVAILABILITY_DEVICE_NOT_ELIGIBLE,
166+
ML_LXM_AVAILABILITY_SERVICE_DISABLED,
167+
ML_LXM_AVAILABILITY_MODEL_NOT_READY,
168+
ML_LXM_AVAILABILITY_UNKNOWN
169+
} ml_lxm_availability_e;
170+
171+
/**
172+
* @brief Checks LXM service availability.
173+
* @param[out] status Current availability status.
174+
* @return ML_ERROR_NONE on success, error code otherwise.
175+
*/
176+
int ml_lxm_check_availability (ml_lxm_availability_e * status);
177+
178+
/**
179+
* @brief A handle for lxm session.
180+
*/
181+
typedef void *ml_lxm_session_h;
182+
183+
/**
184+
* @brief Creates an LXM session.
185+
* @param[out] session Session handle.
186+
* @param[in] config_path Path to configuration file.
187+
* @param[in] instructions Initial instructions (optional).
188+
* @return ML_ERROR_NONE on success.
189+
*/
190+
int ml_lxm_session_create (ml_lxm_session_h * session, const char *config_path, const char *instructions);
191+
192+
/**
193+
* @brief Destroys an LXM session.
194+
* @param[in] session Session handle.
195+
* @return ML_ERROR_NONE on success.
196+
*/
197+
int ml_lxm_session_destroy (ml_lxm_session_h session);
198+
199+
/**
200+
* @brief A handle for lxm prompt.
201+
*/
202+
typedef void *ml_lxm_prompt_h;
203+
204+
/**
205+
* @brief Creates a prompt object.
206+
* @param[out] prompt Prompt handle.
207+
* @return ML_ERROR_NONE on success.
208+
*/
209+
int ml_lxm_prompt_create (ml_lxm_prompt_h * prompt);
210+
211+
/**
212+
* @brief Appends text to a prompt.
213+
* @param[in] prompt Prompt handle.
214+
* @param[in] text Text to append.
215+
* @return ML_ERROR_NONE on success.
216+
*/
217+
int ml_lxm_prompt_append_text (ml_lxm_prompt_h prompt, const char *text);
218+
219+
/**
220+
* @brief Appends an instruction to a prompt.
221+
* @param[in] prompt Prompt handle.
222+
* @param[in] instruction Instruction to append.
223+
* @return ML_ERROR_NONE on success.
224+
*/
225+
int ml_lxm_prompt_append_instruction (ml_lxm_prompt_h prompt, const char *instruction);
226+
227+
/**
228+
* @brief Destroys a prompt object.
229+
* @param[in] prompt Prompt handle.
230+
* @return ML_ERROR_NONE on success.
231+
*/
232+
int ml_lxm_prompt_destroy (ml_lxm_prompt_h prompt);
233+
234+
/**
235+
* @brief Sets runtime instructions for a session.
236+
* @param[in] session Session handle.
237+
* @param[in] instructions New instructions.
238+
* @return ML_ERROR_NONE on success.
239+
*/
240+
int ml_lxm_session_set_instructions (ml_lxm_session_h session, const char *instructions);
241+
242+
/**
243+
* @brief Generation options for LXM responses.
244+
*/
245+
typedef struct
246+
{
247+
double temperature; /* < Creativity control (0.0~2.0) */
248+
size_t max_tokens; /* < Maximum tokens to generate */
249+
} ml_lxm_generation_options_s;
250+
251+
/**
252+
* @brief Token streaming callback type.
253+
* @param token Generated token string.
254+
* @param user_data User-defined context.
255+
*/
256+
typedef void (*ml_lxm_token_cb) (ml_service_event_e event, ml_information_h event_data, void *user_data);
257+
258+
/**
259+
* @brief Generates an token-streamed response.
260+
* @param[in] session Session handle.
261+
* @param[in] prompt Prompt handle.
262+
* @param[in] options Generation parameters.
263+
* @param[in] token_callback Callback for each generated token.
264+
* @param[in] user_data User context passed to callback.
265+
* @return ML_ERROR_NONE on success.
266+
*/
267+
int ml_lxm_session_respond (ml_lxm_session_h session, ml_lxm_prompt_h prompt, const ml_lxm_generation_options_s * options, ml_lxm_token_cb token_callback, void *user_data);
268+
269+
#ifdef __cplusplus
270+
}
271+
#endif
272+
#endif
273+
/* __ML_LXM_SERVICE_INTERNAL_H__ */

c/src/meson.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
nns_capi_common_srcs = files('ml-api-common.c', 'ml-api-inference-internal.c')
22
nns_capi_single_srcs = files('ml-api-inference-single.c')
33
nns_capi_pipeline_srcs = files('ml-api-inference-pipeline.c')
4-
nns_capi_service_srcs = files('ml-api-service.c', 'ml-api-service-extension.c', 'ml-api-service-agent-client.c')
4+
nns_capi_service_srcs = files('ml-api-service.c', 'ml-api-service-extension.c', 'ml-api-service-agent-client.c', 'ml-lxm-service.c')
55

66
if support_nnstreamer_edge
77
nns_capi_service_srcs += files('ml-api-service-query.c')

0 commit comments

Comments
 (0)