Skip to content

Commit 1791d53

Browse files
authored
Merge pull request #56 from InfiniTensor/dev
Dev
2 parents 1600518 + cf88675 commit 1791d53

File tree

120 files changed

+2226
-1066
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+2226
-1066
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ if(USE_CUDA)
2525
endif()
2626
message(STATUS "CMAKE_CUDA_HOST_COMPILER set to " ${CMAKE_CUDA_HOST_COMPILER})
2727
message(STATUS "CMAKE_CUDA_ARCHITECTURES set to " ${CMAKE_CUDA_ARCHITECTURES})
28+
message(STATUS "CUDA_PATH set to " ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
29+
add_compile_definitions(CUDA_INCLUDE_PATH="${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
2830
endif()
2931

3032
if(USE_KUNLUN)

src/00common/include/common/data_type.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ namespace refactor {
4545
bool isFloat() const noexcept;
4646
bool isSignedLarge() const noexcept;
4747
bool isSigned() const noexcept;
48+
bool isUnsigned() const noexcept;
4849
bool isNumberic() const noexcept;
4950
bool isCpuNumberic() const noexcept;
5051
bool isBool() const noexcept;

src/00common/include/common/natural.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ namespace refactor {
77

88
template<class t = size_t>
99
class natural_t : public std::iterator<std::input_iterator_tag, t> {
10-
size_t _i;
10+
t _i;
1111

1212
public:
1313
natural_t(t val) noexcept : _i(val) {}
@@ -33,7 +33,7 @@ namespace refactor {
3333

3434
template<class t = size_t>
3535
class rev_natural_t : public std::iterator<std::input_iterator_tag, t> {
36-
size_t _i;
36+
t _i;
3737

3838
public:
3939
rev_natural_t(t val) noexcept : _i(val - 1) {}

src/00common/include/common/range.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace refactor {
1616

1717
bool empty() const noexcept { return end_ == begin_; }
1818
size_t size() const noexcept { return end_ - begin_; }
19-
t at(size_t i) const noexcept {
19+
t at(size_t i) const {
2020
ASSERT(i < size(), "Index out of range");
2121
return operator[](i);
2222
}

src/00common/src/data_type.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ namespace refactor {
8282
DT::I8, DT::I16, DT::I32, DT::I64};
8383
return set.contains(internal);
8484
}
85+
bool DT::isUnsigned() const noexcept {
86+
static const std::unordered_set<Enum> set{
87+
DT::U8, DT::U16, DT::U32, DT::U64};
88+
return set.contains(internal);
89+
}
8590
bool DT::isNumberic() const noexcept {
8691
static const std::unordered_set<Enum> set{
8792
DT::F32, DT::U8, DT::I8, DT::U16, DT::I16,

src/01graph_topo/include/graph_topo/linked_graph.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ namespace refactor::graph_topo {
3737
Rc<Node> pushNode(TN, std::vector<Rc<Edge>>);
3838
void eraseNode(count_t);
3939
void eraseNode(Rc<Node>);
40-
size_t cleanup(bool useless(TE const &) = nullptr);
40+
size_t cleanup(bool useful(TE const &) = nullptr);
4141
bool sort();
4242
};
4343

@@ -86,6 +86,7 @@ namespace refactor::graph_topo {
8686
node.disconnect(i);
8787
}
8888
for (auto const &out : node._outputs) {
89+
ASSERT(out->_targets.empty(), "Output edge should not have targets");
8990
out->_source = nullptr;
9091
}
9192
}
@@ -180,23 +181,22 @@ namespace refactor::graph_topo {
180181
_nodes.erase(it);
181182
}
182183

183-
LINKED_GRAPH_FN cleanup(bool useless(TE const &))->size_t {
184+
LINKED_GRAPH_FN cleanup(bool useful(TE const &))->size_t {
184185
std::unordered_set<Edge *> outputs;
185186
outputs.reserve(_outputs.size());
186187
std::transform(_outputs.begin(), _outputs.end(), std::inserter(outputs, outputs.end()), [](auto const &e) { return e.get(); });
187-
auto useful = [&](Rc<Edge> const &e) {
188-
return !e->_targets.empty() || // 还有节点连接到这个边
189-
outputs.contains(e.get()) ||// 这个边是全图输出
190-
!useless || // 不需要其他判断
191-
!useless(e->_info); // 这个边其他原因有用
188+
auto useful_ = [&](Rc<Edge> const &e) {
189+
return !e->_targets.empty() || // 还有节点连接到这个边
190+
outputs.contains(e.get()) || // 这个边是全图输出
191+
(useful && useful(e->_info));// 这个边其他原因有用
192192
};
193193

194194
auto before = _nodes.size();
195195
while (true) {
196196
auto endit = std::remove_if(
197197
_nodes.begin(), _nodes.end(),
198198
[&, this](auto &n) {
199-
auto useless_ = std::none_of(n->_outputs.begin(), n->_outputs.end(), useful);
199+
auto useless_ = std::none_of(n->_outputs.begin(), n->_outputs.end(), useful_);
200200
if (useless_) { _cleanupNode(*n); }
201201
return useless_;
202202
});

src/03runtime/include/runtime/stream.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ namespace refactor::runtime {
2424
struct Edge {
2525
Arc<hardware::Device::Blob> blob;
2626
size_t stackOffset;
27+
std::string name;
2728
};
2829

2930
class Stream {
@@ -39,6 +40,8 @@ namespace refactor::runtime {
3940
std::vector<Node>,
4041
std::vector<Edge>,
4142
decltype(_device));
43+
44+
decltype(_graph) const &graph() const noexcept { return _graph; }
4245
void setData(count_t, void const *, size_t);
4346
void setData(count_t, Arc<hardware::Device::Blob>);
4447
bool getData(count_t, void *, size_t) const;

src/04kernel/cuda/include/kernel/cuda/functions.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
namespace refactor::kernel::cuda {
55

6+
int currentDevice();
7+
68
void sync();
79

810
void copyOut(void *dst, const void *src, size_t size);
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#ifndef KERNEL_CUDA_SCATTER_ND_CUH
2+
#define KERNEL_CUDA_SCATTER_ND_CUH
3+
4+
#include "threads_distributer.cuh"
5+
6+
namespace refactor::kernel::cuda {
7+
8+
void launchScatterND(
9+
KernelLaunchParameters const &,
10+
void const *data,
11+
void const *indices,
12+
void const *updates,
13+
void *output,
14+
unsigned int const *strides,
15+
size_t rank,
16+
unsigned int blockCount,
17+
size_t blockSize);
18+
19+
}// namespace refactor::kernel::cuda
20+
21+
#endif// KERNEL_CUDA_SCATTER_ND_CUH

src/04kernel/cuda/src/functions.cu

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
#include "kernel/cuda/functions.cuh"
2+
#include "macro.cuh"
23
#include <cstdio>
34

45
namespace refactor::kernel::cuda {
56

7+
int currentDevice() {
8+
int device;
9+
CUDA_ASSERT(cudaGetDevice(&device));
10+
return device;
11+
}
12+
613
void sync() {
7-
auto state = cudaDeviceSynchronize();
8-
if (state != cudaSuccess) {
9-
printf("cudaDeviceSynchronize failed: %s\n", cudaGetErrorString(state));
10-
exit(1);
11-
}
14+
CUDA_ASSERT(cudaDeviceSynchronize());
1215
}
1316

1417
void copyOut(void *dst, const void *src, size_t size) {
1518
sync();
16-
cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
19+
CUDA_ASSERT(cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost));
1720
}
1821

1922
}// namespace refactor::kernel::cuda

0 commit comments

Comments
 (0)