Skip to content

Commit 9264392

Browse files
wasamtcServer
andauthored
fix(bug): just change kvcache and param_host name (#229)
Co-authored-by: Server <cloud@macmini-3-204.local>
1 parent fdc82a9 commit 9264392

File tree

7 files changed

+29
-29
lines changed

7 files changed

+29
-29
lines changed

src/backend/server/rpc_connection_handler.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ def node_join(self, message):
4040
# "memory_gb": 100,
4141
# "memory_bandwidth_gbps": 100,
4242
# },
43-
# "kv_cache_ratio": 0.3,
44-
# "param_hosting_ratio": 0.5,
43+
# "kvcache_mem_ratio": 0.3,
44+
# "param_mem_ratio": 0.5,
4545
# "max_concurrent_requests": 16,
4646
# "max_sequence_length": 1024,
4747
# }
@@ -162,8 +162,8 @@ def build_node(self, node_json: dict):
162162
node_id=node_json.get("node_id"),
163163
hardware=self.build_hardware(node_json.get("hardware")),
164164
model_info=self.scheduler.model_info,
165-
kv_cache_ratio=node_json.get("kv_cache_ratio"),
166-
param_hosting_ratio=node_json.get("param_hosting_ratio"),
165+
kvcache_mem_ratio=node_json.get("kvcache_mem_ratio"),
166+
param_mem_ratio=node_json.get("param_mem_ratio"),
167167
max_concurrent_requests=node_json.get("max_concurrent_requests"),
168168
max_sequence_length=node_json.get("max_sequence_length"),
169169
is_active=node_json.get("is_active", True),

src/backend/server/static_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def estimate_vram_gb_required(model_info):
169169
if model_info is None:
170170
return 0
171171

172-
param_hosting_ratio = 0.65
172+
param_mem_ratio = 0.65
173173
return (
174174
(
175175
model_info.embedding_io_bytes
@@ -179,7 +179,7 @@ def estimate_vram_gb_required(model_info):
179179
/ 1024
180180
/ 1024
181181
/ 1024
182-
/ param_hosting_ratio
182+
/ param_mem_ratio
183183
)
184184

185185

src/parallax/launch.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@
9191
model_name=args.model_path,
9292
max_batch_size=args.max_batch_size,
9393
max_sequence_length=args.max_sequence_length,
94-
param_hosting_ratio=args.param_hosting_ratio,
95-
kv_cache_ratio=args.kv_cache_ratio,
94+
param_mem_ratio=args.param_mem_ratio,
95+
kvcache_mem_ratio=args.kvcache_mem_ratio,
9696
)
9797
if gradient_server is not None:
9898
gradient_server.status = ServerState.READY
@@ -131,8 +131,8 @@
131131
model_name=args.model_path,
132132
max_batch_size=args.max_batch_size,
133133
max_sequence_length=args.max_sequence_length,
134-
param_hosting_ratio=args.param_hosting_ratio,
135-
kv_cache_ratio=args.kv_cache_ratio,
134+
param_mem_ratio=args.param_mem_ratio,
135+
kvcache_mem_ratio=args.kvcache_mem_ratio,
136136
)
137137
args.start_layer = gradient_server.block_start_index
138138
args.end_layer = gradient_server.block_end_index

src/parallax/p2p/server.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,8 @@ def __init__(
210210
model_name: Optional[str] = None,
211211
max_batch_size: Optional[int] = None,
212212
max_sequence_length: Optional[int] = None,
213-
param_hosting_ratio: float = 0.65,
214-
kv_cache_ratio: float = 0.25,
213+
param_mem_ratio: float = 0.65,
214+
kvcache_mem_ratio: float = 0.25,
215215
):
216216
self.recv_from_peer_addr = recv_from_peer_addr
217217
self.send_to_peer_addr = send_to_peer_addr
@@ -230,8 +230,8 @@ def __init__(
230230
self.model_name = model_name
231231
self.max_batch_size = max_batch_size
232232
self.max_sequence_length = max_sequence_length
233-
self.param_hosting_ratio = param_hosting_ratio
234-
self.kv_cache_ratio = kv_cache_ratio
233+
self.param_mem_ratio = param_mem_ratio
234+
self.kvcache_mem_ratio = kvcache_mem_ratio
235235
self.prefix_id = f"{dht_prefix}_announce"
236236
self.lattica = None
237237
self.routing_table = None
@@ -686,8 +686,8 @@ def get_node_info(self, is_update: bool = False):
686686
info = {
687687
"node_id": self.lattica.peer_id(),
688688
"hardware": detect_node_hardware(self.lattica.peer_id()),
689-
"kv_cache_ratio": self.kv_cache_ratio,
690-
"param_hosting_ratio": self.param_hosting_ratio,
689+
"kvcache_mem_ratio": self.kvcache_mem_ratio,
690+
"param_mem_ratio": self.param_mem_ratio,
691691
"max_concurrent_requests": self.max_batch_size,
692692
"max_sequence_length": (
693693
1024 if self.max_sequence_length is None else self.max_sequence_length
@@ -753,8 +753,8 @@ def launch_p2p_server(
753753
model_name: Optional[str],
754754
max_batch_size: Optional[int] = None,
755755
max_sequence_length: Optional[int] = None,
756-
param_hosting_ratio: float = 0.65,
757-
kv_cache_ratio: float = 0.25,
756+
param_mem_ratio: float = 0.65,
757+
kvcache_mem_ratio: float = 0.25,
758758
):
759759
server = GradientServer(
760760
recv_from_peer_addr=recv_from_peer_addr,
@@ -774,8 +774,8 @@ def launch_p2p_server(
774774
model_name=model_name,
775775
max_batch_size=max_batch_size,
776776
max_sequence_length=max_sequence_length,
777-
param_hosting_ratio=param_hosting_ratio,
778-
kv_cache_ratio=kv_cache_ratio,
777+
param_mem_ratio=param_mem_ratio,
778+
kvcache_mem_ratio=kvcache_mem_ratio,
779779
)
780780
# Start the server
781781
thread = threading.Thread(target=server.run, daemon=True)

src/parallax/server/server_args.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,14 @@ def parse_args() -> argparse.Namespace:
6161
)
6262

6363
parser.add_argument(
64-
"--param-hosting-ratio",
64+
"--param-mem-ratio",
6565
type=float,
6666
default=0.65,
6767
help="Ratio of GPU memory to use for parameter hosting",
6868
)
6969

7070
parser.add_argument(
71-
"--kv-cache-ratio",
71+
"--kvcache-mem-ratio",
7272
type=float,
7373
default=0.25,
7474
help="Ratio of GPU memory to use for KV cache",

src/scheduling/node.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,8 @@ class Node:
174174
hardware: NodeHardwareInfo
175175
model_info: ModelInfo
176176

177-
kv_cache_ratio: float = 0.3
178-
param_hosting_ratio: float = 0.5
177+
kvcache_mem_ratio: float = 0.3
178+
param_mem_ratio: float = 0.5
179179

180180
max_concurrent_requests: int = 16
181181
max_sequence_length: int = 4096
@@ -221,7 +221,7 @@ def max_requests(self) -> int:
221221
requested_max_batch_size=self.max_concurrent_requests,
222222
max_sequence_len=self.max_sequence_length,
223223
device=None,
224-
kv_cache_memory_fraction=self.kv_cache_ratio,
224+
kv_cache_memory_fraction=self.kvcache_mem_ratio,
225225
num_shard_layers=self.num_current_layers,
226226
num_key_value_heads=self.model_info.num_kv_heads,
227227
head_dim=self.model_info.head_size,
@@ -278,7 +278,7 @@ def get_decoder_layer_capacity(
278278
* 1024
279279
* 1024
280280
* 1024
281-
* self.param_hosting_ratio
281+
* self.param_mem_ratio
282282
)
283283
if include_input_embed:
284284
available_memory_bytes -= self.model_info.embedding_io_bytes
@@ -312,7 +312,7 @@ def per_decoder_layer_kv_cache_memory(self) -> Optional[int]:
312312
* 1024
313313
* 1024
314314
* 1024
315-
* self.kv_cache_ratio
315+
* self.kvcache_mem_ratio
316316
)
317317
/ self.num_current_layers
318318
)

src/scheduling/scheduler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,8 @@ def join(self, node: Node, bootstrap: bool = False) -> None:
296296
logger.debug(
297297
"Joining node %s (kv_ratio=%.2f, param_ratio=%.2f, manual_assignment=%s)",
298298
node.node_id,
299-
node.kv_cache_ratio,
300-
node.param_hosting_ratio,
299+
node.kvcache_mem_ratio,
300+
node.param_mem_ratio,
301301
node.manual_layer_assignment,
302302
)
303303
self.layer_allocator.declare(node)

0 commit comments

Comments
 (0)