Skip to content

Commit bd6ad6b

Browse files
committed
feat(hardware): 实现 mlu 硬件相关的函数;改变编译方式按照以硬件名称命名的目录名区分是否需要编译
1 parent 535134b commit bd6ad6b

File tree

13 files changed

+167
-23
lines changed

13 files changed

+167
-23
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})
55
option(ABSL_PROPAGATE_CXX_STD "Abseil need this option" ON)
66
option(USE_CUDA "Support Nvidia GPU" OFF)
77
option(USE_KUNLUN "Support Baidu Kunlunxin" OFF)
8+
option(USE_BANG "Support Hanwuji MLU" OFF)
89

910
set(CMAKE_CXX_STANDARD 20)
1011
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -41,6 +42,10 @@ if(USE_KUNLUN)
4142
message(STATUS "KUNLUN_HOME: ${KUNLUN_HOME}")
4243
endif()
4344

45+
if (USE_BANG)
46+
add_compile_definitions(USE_BANG)
47+
endif()
48+
4449
add_compile_options(-march=native) # this will cause error in some machine
4550
add_compile_options(-mtune=native)
4651
add_compile_options(-Wall)

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
TYPE ?= Debug
44
CUDA ?= OFF
55
KUNLUN ?= OFF
6+
BANG ?= OFF
67

78
CMAKE_EXTRA =
89
# CMAKE_EXTRA += -DCMAKE_CXX_COMPILER=

src/02hardware/CMakeLists.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,18 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
22
project(hardware VERSION 0.0.0 LANGUAGES CXX)
33
message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})
44

5+
# Source files
6+
file(GLOB HARDWARE_SRC src/*.cc src/*.cpp src/devices/cpu/*.cc)
7+
58
if(USE_CUDA)
6-
file(GLOB_RECURSE HARDWARE_CUDA_SRC src/*.cu)
9+
file(GLOB_RECURSE HARDWARE_CUDA_SRC src/devices/nvidia/*.cu src/devices/nvidia/*.cc)
10+
endif()
11+
12+
if(USE_BANG)
13+
file(GLOB_RECURSE HARDWARE_BANG_SRC src/devices/mlu/*.cc)
714
endif()
815

9-
file(GLOB_RECURSE HARDWARE_SRC src/*.cc src/*.cpp)
10-
add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC})
16+
add_library(hardware STATIC ${HARDWARE_SRC} ${HARDWARE_CUDA_SRC} ${HARDWARE_BANG_SRC})
1117
target_link_libraries(hardware PUBLIC common)
1218
target_include_directories(hardware PUBLIC include)
1319

src/02hardware/include/hardware/device.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ namespace refactor::hardware {
1111
enum class Type : int32_t {
1212
Cpu,
1313
Nvidia,
14+
Mlu,
15+
Kunlun,
1416
};
1517

1618
protected:
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#ifndef HARDWARE_DEVICES_MLU_H
2+
#define HARDWARE_DEVICES_MLU_H
3+
4+
#include "../device.h"
5+
6+
namespace refactor::hardware {
7+
8+
class Mlu final : public Device {
9+
public:
10+
explicit Mlu(int32_t card);
11+
void setContext() const noexcept final;
12+
Type type() const noexcept final {
13+
return Type::Mlu;
14+
}
15+
};
16+
17+
}// namespace refactor::hardware
18+
19+
#endif// HARDWARE_DEVICES_MLU_H

src/02hardware/src/devices/cpu/memory.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,19 @@
55
namespace refactor::hardware {
66
using M = CpuMemory;
77

8-
void *M::malloc(size_t size) noexcept {
8+
void *M::malloc(size_t size) {
99
return std::malloc(size);
1010
}
11-
void M::free(void *ptr) noexcept {
11+
void M::free(void *ptr) {
1212
std::free(ptr);
1313
}
14-
void *M::copyHD(void *dst, void const *src, size_t bytes) const noexcept {
14+
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
1515
return std::memcpy(dst, src, bytes);
1616
}
17-
void *M::copyDH(void *dst, void const *src, size_t bytes) const noexcept {
17+
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
1818
return std::memcpy(dst, src, bytes);
1919
}
20-
void *M::copyDD(void *dst, void const *src, size_t bytes) const noexcept {
20+
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
2121
return std::memcpy(dst, src, bytes);
2222
}
2323

src/02hardware/src/devices/cpu/memory.hh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
namespace refactor::hardware {
77

88
class CpuMemory final : public Memory {
9-
void *malloc(size_t) noexcept final;
10-
void free(void *) noexcept final;
11-
void *copyHD(void *dst, void const *src, size_t bytes) const noexcept final;
12-
void *copyDH(void *dst, void const *src, size_t bytes) const noexcept final;
13-
void *copyDD(void *dst, void const *src, size_t bytes) const noexcept final;
9+
void *malloc(size_t) final;
10+
void free(void *) final;
11+
void *copyHD(void *dst, void const *src, size_t bytes) const final;
12+
void *copyDH(void *dst, void const *src, size_t bytes) const final;
13+
void *copyDD(void *dst, void const *src, size_t bytes) const final;
1414
};
1515

1616
}// namespace refactor::hardware
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#include "functions.cc"
2+
#include "hardware/devices/mlu.h"
3+
#include "hardware/mem_pool.h"
4+
#include "memory.hh"
5+
6+
namespace refactor::hardware {
7+
8+
static Arc<Memory> bangMemory(int32_t card) {
9+
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
10+
setDevice(card);
11+
auto [free, total] = getMemInfo();
12+
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
13+
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}",
14+
card, free, total, size);
15+
return std::make_shared<MemPool>(
16+
std::make_shared<MluMemory>(),
17+
size,
18+
256ul);
19+
}
20+
21+
Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {}
22+
23+
void Mlu::setContext() const noexcept {
24+
setDevice(_card);
25+
}
26+
27+
}// namespace refactor::hardware
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include "functions.hh"
2+
3+
namespace refactor::hardware {
4+
5+
int getDeviceCount() {
6+
int deviceCount;
7+
BANG_ASSERT(cnrtGetDeviceCount(&deviceCount));
8+
return deviceCount;
9+
}
10+
void setDevice(int device) {
11+
BANG_ASSERT(cnrtSetDevice(device));
12+
}
13+
MemInfo getMemInfo() {
14+
MemInfo memInfo;
15+
BANG_ASSERT(cudaMemGetInfo(&memInfo.free, &memInfo.total));
16+
return memInfo;
17+
}
18+
19+
}// namespace refactor::hardware
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH
2+
#define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH
3+
4+
#include "common.h"
5+
6+
#define BANG_ASSERT(STATUS) \
7+
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
8+
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
9+
cnrtGetErrorStr(status), (int) status)); \
10+
}
11+
12+
namespace refactor::hardware {
13+
14+
struct MemInfo {
15+
size_t free, total;
16+
};
17+
18+
int getDeviceCount();
19+
void setDevice(int device);
20+
MemInfo getMemInfo();
21+
22+
}// namespace refactor::hardware
23+
24+
#endif// HARDWARE_DEVICES_NVIDIA_FUNCTIONS_CUH

0 commit comments

Comments
 (0)