Skip to content

Commit f69e083

Browse files
authored
dummy import fd (#5192)
1 parent 8e4e3ff commit f69e083

File tree

1 file changed

+93
-63
lines changed
  • fastdeploy/cache_manager

1 file changed

+93
-63
lines changed

fastdeploy/cache_manager/ops.py

Lines changed: 93 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,107 @@
1+
"""
2+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""
16+
117
import paddle
218

319
from fastdeploy.platforms import current_platform
420

5-
if current_platform.is_cuda():
6-
from fastdeploy.model_executor.ops.gpu import (
7-
cuda_host_alloc,
8-
cuda_host_free,
9-
get_data_ptr_ipc,
10-
get_output_kv_signal,
11-
ipc_sent_key_value_cache_by_remote_ptr,
12-
ipc_sent_key_value_cache_by_remote_ptr_block_sync,
13-
set_data_ipc,
14-
share_external_data,
15-
swap_cache_all_layers,
16-
unset_data_ipc,
17-
)
18-
19-
memory_allocated = paddle.device.cuda.memory_allocated
20-
21-
def get_peer_mem_addr(*args, **kwargs):
22-
raise RuntimeError("CUDA no need of get_peer_mem_addr!")
23-
24-
elif current_platform.is_xpu():
25-
from fastdeploy.model_executor.ops.xpu import (
26-
cuda_host_alloc,
27-
cuda_host_free,
28-
get_output_kv_signal,
29-
get_peer_mem_addr,
30-
set_data_ipc,
31-
share_external_data,
32-
swap_cache_all_layers,
33-
)
34-
35-
unset_data_ipc = None
36-
memory_allocated = paddle.device.xpu.memory_allocated
37-
38-
def get_data_ptr_ipc(*args, **kwargs):
39-
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")
40-
41-
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
42-
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
43-
44-
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
45-
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
46-
47-
else:
48-
raise RuntimeError("Prefix cache ops only supported CUDA nor XPU platform ")
49-
50-
51-
def set_device(device):
21+
try:
5222
if current_platform.is_cuda():
53-
paddle.set_device(f"gpu:{device}")
23+
from fastdeploy.model_executor.ops.gpu import (
24+
cuda_host_alloc,
25+
cuda_host_free,
26+
get_data_ptr_ipc,
27+
get_output_kv_signal,
28+
ipc_sent_key_value_cache_by_remote_ptr,
29+
ipc_sent_key_value_cache_by_remote_ptr_block_sync,
30+
set_data_ipc,
31+
share_external_data,
32+
swap_cache_all_layers,
33+
unset_data_ipc,
34+
)
35+
36+
memory_allocated = paddle.device.cuda.memory_allocated
37+
38+
def get_peer_mem_addr(*args, **kwargs):
39+
raise RuntimeError("CUDA no need of get_peer_mem_addr!")
40+
5441
elif current_platform.is_xpu():
55-
paddle.set_device(f"xpu:{device}")
56-
else:
57-
raise RuntimeError("No supported platform")
42+
from fastdeploy.model_executor.ops.xpu import (
43+
cuda_host_alloc,
44+
cuda_host_free,
45+
get_output_kv_signal,
46+
get_peer_mem_addr,
47+
set_data_ipc,
48+
share_external_data,
49+
swap_cache_all_layers,
50+
)
5851

52+
unset_data_ipc = None
53+
memory_allocated = paddle.device.xpu.memory_allocated
5954

60-
def share_external_data_(cache, cache_name, cache_shape, use_ipc):
61-
if current_platform.is_cuda():
62-
cache = share_external_data(cache, cache_name, cache_shape)
63-
elif current_platform.is_xpu():
64-
cache = share_external_data(cache, cache_name, cache_shape, use_ipc)
65-
else:
66-
raise RuntimeError("No supported platform")
67-
return cache
55+
def get_data_ptr_ipc(*args, **kwargs):
56+
raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")
6857

58+
def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
59+
raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
60+
61+
def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
62+
raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
6963

70-
def get_all_visible_devices():
71-
if current_platform.is_xpu():
72-
return "XPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
7364
else:
74-
return "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
65+
raise RuntimeError("Prefix cache ops only supported CUDA nor XPU platform ")
66+
67+
def set_device(device):
68+
if current_platform.is_cuda():
69+
paddle.set_device(f"gpu:{device}")
70+
elif current_platform.is_xpu():
71+
paddle.set_device(f"xpu:{device}")
72+
else:
73+
raise RuntimeError("No supported platform")
74+
75+
def share_external_data_(cache, cache_name, cache_shape, use_ipc):
76+
if current_platform.is_cuda():
77+
cache = share_external_data(cache, cache_name, cache_shape)
78+
elif current_platform.is_xpu():
79+
cache = share_external_data(cache, cache_name, cache_shape, use_ipc)
80+
else:
81+
raise RuntimeError("No supported platform")
82+
return cache
83+
84+
def get_all_visible_devices():
85+
if current_platform.is_xpu():
86+
return "XPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
87+
else:
88+
return "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
89+
90+
except:
91+
cuda_host_alloc = None
92+
cuda_host_free = None
93+
set_data_ipc = None
94+
share_external_data_ = None
95+
swap_cache_all_layers = None
96+
unset_data_ipc = None
97+
set_device = None
98+
memory_allocated = None
99+
get_output_kv_signal = None
100+
get_data_ptr_ipc = None
101+
ipc_sent_key_value_cache_by_remote_ptr = None
102+
ipc_sent_key_value_cache_by_remote_ptr_block_sync = None
103+
get_peer_mem_addr = None
104+
get_all_visible_devices = None
75105

76106

77107
__all__ = [

0 commit comments

Comments
 (0)