@@ -31,65 +31,10 @@ namespace xllm {
3131
3232DistManager::DistManager (const runtime::Options& options) {
3333 auto master_node_addr = options.master_node_addr ().value_or (" " );
34- // Single-Node Worker Mode
35- if (master_node_addr.empty ()) {
36- setup_single_node_workers (options);
37- } else {
38- // Multi-node Worker Mode
34+ if (!master_node_addr.empty ()) {
3935 setup_multi_node_workers (options, master_node_addr);
40- }
41- }
42-
43- void DistManager::setup_single_node_workers (const runtime::Options& options) {
44- const auto & devices = options.devices ();
45- CHECK_EQ ((devices.size () % options.dp_size ()), 0 )
46- << " Device size must be divisible by dp size in single-node serving "
47- " mode." ;
48-
49- // initialize process groups if there are multiple devices
50- if (devices.size () > 1 ) {
51- // create a process group for each device if there are multiple gpus
52- process_groups_ = parallel_state::create_npu_process_groups (devices);
53- }
54-
55- const int32_t dp_local_tp_size = devices.size () / options.dp_size ();
56- if (options.dp_size () > 1 && options.dp_size () < devices.size ()) {
57- dp_local_process_groups_.reserve (options.dp_size ());
58- for (size_t dp_rank = 0 ; dp_rank < options.dp_size (); ++dp_rank) {
59- auto dp_local_group_device_begin_idx = devices.begin ();
60- std::advance (dp_local_group_device_begin_idx, dp_rank * dp_local_tp_size);
61- auto dp_local_group_device_end_idx = devices.begin ();
62- std::advance (dp_local_group_device_end_idx,
63- (dp_rank + 1 ) * dp_local_tp_size);
64- std::vector<torch::Device> dp_local_group_devices;
65- std::copy (dp_local_group_device_begin_idx,
66- dp_local_group_device_end_idx,
67- std::back_inserter (dp_local_group_devices));
68- dp_local_process_groups_.emplace_back (
69- parallel_state::create_npu_process_groups (dp_local_group_devices));
70- }
71- }
72-
73- // create a worker(as worker client also) for each device
74- const int32_t world_size = static_cast <int32_t >(devices.size ());
75- WorkerType worker_type =
76- (options.task_type () == " generate" ) ? WorkerType::LLM : WorkerType::ELM;
77- for (size_t i = 0 ; i < devices.size (); ++i) {
78- const int32_t rank = static_cast <int32_t >(i);
79- ProcessGroup* pg = world_size > 1 ? process_groups_[i].get () : nullptr ;
80- // dp local process groups
81- ProcessGroup* dp_local_pg =
82- (options.dp_size () > 1 && options.dp_size () < world_size)
83- ? (dp_local_process_groups_[i / dp_local_tp_size]
84- [i % dp_local_tp_size])
85- .get ()
86- : nullptr ;
87- ParallelArgs parallel_args (
88- rank, world_size, pg, dp_local_pg, options.dp_size ());
89- workers_.emplace_back (std::make_unique<Worker>(
90- parallel_args, devices[i], options, worker_type));
91- worker_clients_.emplace_back (
92- std::make_unique<WorkerClient>(workers_.back ().get ()));
36+ } else {
37+ LOG (FATAL) << " master_node_addr is empty." ;
9338 }
9439}
9540
@@ -166,10 +111,17 @@ void DistManager::setup_multi_node_workers(
166111
167112 runtime::Options worker_server_options = options;
168113 worker_server_options.world_size (world_size);
169-
170- WorkerType worker_type =
171- (options.task_type () == " generate" ) ? WorkerType::LLM : WorkerType::ELM;
172-
114+ WorkerType worker_type (" LLM" );
115+ const auto & model_backend = options.backend ();
116+ if (model_backend == " llm" ) {
117+ worker_type =
118+ (options.task_type () == " generate" ) ? WorkerType::LLM : WorkerType::ELM;
119+ } else if (model_backend == " vlm" ) {
120+ worker_type = (options.task_type () == " generate" ) ? WorkerType::VLM
121+ : WorkerType::EVLM;
122+ } else {
123+ LOG (ERROR) << " Unsupported " << model_backend << " in multi-node." ;
124+ }
173125 // create local workers
174126 for (size_t i = 0 ; i < devices.size (); ++i) {
175127 // worldsize = 8
0 commit comments