jd-opensource
diff --git a/‎xllm/api_service/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎xllm/api_service/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎xllm/api_service/api_service.cpp‎
Lines changed: 71 additions & 49 deletions b/‎xllm/api_service/api_service.cpp‎
Lines changed: 71 additions & 49 deletions
diff --git a/‎xllm/api_service/api_service.h‎
Lines changed: 2 additions & 0 deletions b/‎xllm/api_service/api_service.h‎
Lines changed: 2 additions & 0 deletions
@@ -8,6 +8,7 @@ cc_library(
     api_service_impl.h
     call.h
     completion_service_impl.h
+    rec_completion_service_impl.h
     chat_service_impl.h
     embedding_service_impl.h
     image_generation_service_impl.h
@@ -23,6 +24,7 @@ cc_library(
     api_service.cpp
     call.cpp
     completion_service_impl.cpp
+    rec_completion_service_impl.cpp
     chat_service_impl.cpp
     embedding_service_impl.cpp
     image_generation_service_impl.cpp
 
@@ -27,6 +27,8 @@ limitations under the License.
 #include "core/common/metrics.h"
 #include "core/runtime/dit_master.h"
 #include "core/runtime/llm_master.h"
+// TODO. add following when next pr.
+// #include "core/runtime/rec_master.h"
 #include "core/runtime/vlm_master.h"
 #include "core/util/closure_guard.h"
 #include "embedding.pb.h"
@@ -70,6 +72,11 @@ APIService::APIService(Master* master,
     image_generation_service_impl_ =
         std::make_unique<ImageGenerationServiceImpl>(
             dynamic_cast<DiTMaster*>(master), model_names);
+  } else if (FLAGS_backend == "rec") {
+    // TODO. delete this when next pr.
+    using RecMaster = LLMMaster;
+    rec_completion_service_impl_ = std::make_unique<RecCompletionServiceImpl>(
+        dynamic_cast<RecMaster*>(master), model_names);
   }
   models_service_impl_ =
       ServiceImplFactory<ModelsServiceImpl>::create_service_impl(
@@ -80,81 +87,97 @@ void APIService::Completions(::google::protobuf::RpcController* controller,
                              const proto::CompletionRequest* request,
                              proto::CompletionResponse* response,
                              ::google::protobuf::Closure* done) {
-  // TODO with xllm-service
-}
-
-void APIService::CompletionsHttp(::google::protobuf::RpcController* controller,
-                                 const proto::HttpRequest* request,
-                                 proto::HttpResponse* response,
-                                 ::google::protobuf::Closure* done) {
   xllm::ClosureGuard done_guard(
       done,
       std::bind(request_in_metric, nullptr),
       std::bind(request_out_metric, (void*)controller));
   if (!request || !response || !controller) {
-    LOG(ERROR) << "brpc request | respose | controller is null";
+    LOG(ERROR) << "brpc request | respose | controller is null.";
     return;
   }
+  auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
 
-  auto arena = response->GetArena();
+  if (FLAGS_backend == "llm" || FLAGS_backend == "vlm") {
+    CHECK(completion_service_impl_) << " completion service is invalid.";
+    std::shared_ptr<Call> call = std::make_shared<CompletionCall>(
+        ctrl,
+        done_guard.release(),
+        const_cast<proto::CompletionRequest*>(request),
+        response);
+    completion_service_impl_->process_async(call);
+  } else if (FLAGS_backend == "rec") {
+    CHECK(rec_completion_service_impl_)
+        << " rec completion service is invalid.";
+    std::shared_ptr<Call> call = std::make_shared<CompletionCall>(
+        ctrl,
+        done_guard.release(),
+        const_cast<proto::CompletionRequest*>(request),
+        response);
+    rec_completion_service_impl_->process_async(call);
+  }
+}
+
+namespace {
+template <typename Call, typename Service>
+void CommonCompletionsImpl(std::unique_ptr<Service>& service,
+                           xllm::ClosureGuard& guard,
+                           ::google::protobuf::Arena* arena,
+                           brpc::Controller* ctrl) {
   auto req_pb =
-      google::protobuf::Arena::CreateMessage<proto::CompletionRequest>(arena);
+      google::protobuf::Arena::CreateMessage<typename Call::ReqType>(arena);
   auto resp_pb =
-      google::protobuf::Arena::CreateMessage<proto::CompletionResponse>(arena);
+      google::protobuf::Arena::CreateMessage<typename Call::ResType>(arena);
 
-  auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
   std::string error;
   json2pb::Json2PbOptions options;
   butil::IOBuf& buf = ctrl->request_attachment();
   butil::IOBufAsZeroCopyInputStream iobuf_stream(buf);
   auto st = json2pb::JsonToProtoMessage(&iobuf_stream, req_pb, options, &error);
   if (!st) {
     ctrl->SetFailed(error);
-    LOG(ERROR) << "parse json to proto failed: " << error;
+    LOG(ERROR) << "parse json to proto failed: " << buf.to_string();
     return;
   }
 
-  std::shared_ptr<Call> call = std::make_shared<CompletionCall>(
-      ctrl, done_guard.release(), req_pb, resp_pb);
-  completion_service_impl_->process_async(call);
+  auto call = std::make_shared<Call>(ctrl, guard.release(), req_pb, resp_pb);
+  service->process_async(call);
 }
+}  // namespace
 
-void APIService::ChatCompletions(::google::protobuf::RpcController* controller,
-                                 const proto::ChatRequest* request,
-                                 proto::ChatResponse* response,
+void APIService::CompletionsHttp(::google::protobuf::RpcController* controller,
+                                 const proto::HttpRequest* request,
+                                 proto::HttpResponse* response,
                                  ::google::protobuf::Closure* done) {
-  // TODO with xllm-service
-}
-
-namespace {
-template <typename ChatCall, typename Service>
-void ChatCompletionsImpl(std::unique_ptr<Service>& service,
-                         xllm::ClosureGuard& guard,
-                         ::google::protobuf::Arena* arena,
-                         brpc::Controller* ctrl) {
-  auto req_pb =
-      google::protobuf::Arena::CreateMessage<typename ChatCall::ReqType>(arena);
-  auto resp_pb =
-      google::protobuf::Arena::CreateMessage<typename ChatCall::ResType>(arena);
+  xllm::ClosureGuard done_guard(
+      done,
+      std::bind(request_in_metric, nullptr),
+      std::bind(request_out_metric, (void*)controller));
+  if (!request || !response || !controller) {
+    LOG(ERROR) << "brpc request | respose | controller is null";
+    return;
+  }
 
-  std::string attachment = std::move(ctrl->request_attachment().to_string());
-  std::string error;
+  auto arena = response->GetArena();
+  auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
 
-  google::protobuf::util::JsonParseOptions options;
-  options.ignore_unknown_fields = true;
-  auto json_status =
-      google::protobuf::util::JsonStringToMessage(attachment, req_pb, options);
-  if (!json_status.ok()) {
-    ctrl->SetFailed(json_status.ToString());
-    LOG(ERROR) << "parse json to proto failed: " << json_status.ToString();
-    return;
+  if (FLAGS_backend == "llm" || FLAGS_backend == "vlm") {
+    CHECK(completion_service_impl_) << " completion service is invalid.";
+    CommonCompletionsImpl<CompletionCall, CompletionServiceImpl>(
+        completion_service_impl_, done_guard, arena, ctrl);
+  } else if (FLAGS_backend == "rec") {
+    CHECK(rec_completion_service_impl_)
+        << " rec completion service is invalid.";
+    CommonCompletionsImpl<CompletionCall, RecCompletionServiceImpl>(
+        rec_completion_service_impl_, done_guard, arena, ctrl);
   }
+}
 
-  auto call = std::make_shared<ChatCall>(
-      ctrl, guard.release(), req_pb, resp_pb, arena != nullptr /*use_arena*/);
-  service->process_async(call);
+void APIService::ChatCompletions(::google::protobuf::RpcController* controller,
+                                 const proto::ChatRequest* request,
+                                 proto::ChatResponse* response,
+                                 ::google::protobuf::Closure* done) {
+  // TODO with xllm-service
 }
-}  // namespace
 
 void APIService::ChatCompletionsHttp(
     ::google::protobuf::RpcController* controller,
@@ -175,12 +198,11 @@ void APIService::ChatCompletionsHttp(
   if (FLAGS_backend == "llm") {
     auto arena = response->GetArena();
     CHECK(chat_service_impl_) << " chat service is invalid.";
-    ChatCompletionsImpl<ChatCall, ChatServiceImpl>(
+    CommonCompletionsImpl<ChatCall, ChatServiceImpl>(
         chat_service_impl_, done_guard, arena, ctrl);
   } else if (FLAGS_backend == "vlm") {
     CHECK(mm_chat_service_impl_) << " mm chat service is invalid.";
-    // TODO: fix me - temporarily using heap allocation instead of arena
-    ChatCompletionsImpl<MMChatCall, MMChatServiceImpl>(
+    CommonCompletionsImpl<MMChatCall, MMChatServiceImpl>(
         mm_chat_service_impl_, done_guard, nullptr, ctrl);
   }
 }
 
@@ -21,6 +21,7 @@ limitations under the License.
 #include "image_generation_service_impl.h"
 #include "models_service_impl.h"
 #include "qwen3_rerank_service_impl.h"
+#include "rec_completion_service_impl.h"
 #include "rerank_service_impl.h"
 #include "xllm_service.pb.h"
 
@@ -124,6 +125,7 @@ class APIService : public proto::XllmAPIService {
   std::unique_ptr<ModelsServiceImpl> models_service_impl_;
   std::unique_ptr<ImageGenerationServiceImpl> image_generation_service_impl_;
   std::unique_ptr<RerankServiceImpl> rerank_service_impl_;
+  std::unique_ptr<RecCompletionServiceImpl> rec_completion_service_impl_;
 };
 
 }  // namespace xllm