From 2731b2d18c124c7b13f06e03ffb573493ad418ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xzhInfini=E8=AE=AD=E7=BB=83=E8=90=A5?= <365110614@qq.com> Date: Fri, 16 Jan 2026 20:06:28 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BA=86=E4=BD=9C?= =?UTF-8?q?=E4=B8=9A1=E5=88=B0=E4=BD=9C=E4=B8=9A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/core/allocator.h | 1 + src/core/allocator.cc | 65 ++++++++++++++++++++++++++++++++----- src/core/graph.cc | 48 +++++++++++++++++++++++++-- src/operators/concat.cc | 9 +++++ src/operators/matmul.cc | 37 +++++++++++++++++++-- src/operators/transpose.cc | 15 ++++++++- src/operators/unary.cc | 14 ++++++-- src/utils/operator_utils.cc | 24 ++++++++++++-- 8 files changed, 194 insertions(+), 19 deletions(-) diff --git a/include/core/allocator.h b/include/core/allocator.h index 002601d..7df475c 100644 --- a/include/core/allocator.h +++ b/include/core/allocator.h @@ -27,6 +27,7 @@ namespace infini { // TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并 // HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小 // =================================== 作业 =================================== + std::map free_blocks; public: Allocator(Runtime runtime); diff --git a/src/core/allocator.cc b/src/core/allocator.cc index ff593ae..b38f944 100644 --- a/src/core/allocator.cc +++ b/src/core/allocator.cc @@ -28,23 +28,70 @@ namespace infini IT_ASSERT(this->ptr == nullptr); // pad the size to the multiple of alignment size = this->getAlignedSize(size); - // =================================== 作业 =================================== // TODO: 设计一个算法来分配内存,返回起始地址偏移量 // =================================== 作业 =================================== - return 0; + //算法1:first fit 缺点:碎片化严重。 + //我们使用的map来保存block,其中key是起始地址,value是block的大小 + //我们遍历整个map,找到第一个value大于size的块 + //将这个块从map中移除出去 + //否则我们增加peak + //无论如何,我们要增加used + + for(auto it = free_blocks.begin();it!=free_blocks.end();it++){ + size_t block_addr = it->first; + size_t block_size = it->second; + if(block_size>=size){ + this->used += size; + free_blocks.erase(it); + if(block_size>size){ + free_blocks[block_addr+size] = block_size-size; + } + return block_addr; + }else if (block_addr + block_size == this->peak){ + this->used += size; + size_t needed_extra = size - block_size; // 还需要向系统借多少? + this->peak += needed_extra; // 推高 peak (历史水位线) + free_blocks.erase(it); // 消耗掉这个末尾块 + return block_addr; + } + } + //如果找不到合适的 + size_t block_addr = this->peak; + this->peak += size; + this->used += size; + return block_addr; } - void Allocator::free(size_t addr, size_t size) - { - IT_ASSERT(this->ptr == nullptr); - size = getAlignedSize(size); - // =================================== 作业 =================================== - // TODO: 设计一个算法来回收内存 - // =================================== 作业 =================================== +//我的这段代码有问题吗? + +void Allocator::free(size_t addr, size_t size) +{ + IT_ASSERT(this->ptr == nullptr); + size = getAlignedSize(size); + // =================================== 作业 =================================== + // TODO: 设计一个算法来回收内存 + // =================================== 作业 =================================== + this->used -= size; + free_blocks[addr] = size; + auto it = free_blocks.find(addr); + auto next_it = std::next(it); + if(next_it!=free_blocks.end()){ + if(it->first + it->second == next_it->first){ + it->second += next_it->second; + free_blocks.erase(next_it); + } } + if(it!=free_blocks.begin()){ + auto prev_it = std::prev(it); + if(prev_it->first + prev_it->second == it->first){ + prev_it->second += it->second; + free_blocks.erase(it); + } + } +} void *Allocator::getPtr() { diff --git a/src/core/graph.cc b/src/core/graph.cc index 3a90637..717aad3 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -147,12 +147,56 @@ namespace infini { // topological sorting first IT_ASSERT(topo_sort() == true); - // =================================== 作业 =================================== // TODO:利用 allocator 给计算图分配内存 // HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存 // =================================== 作业 =================================== - + std::unordered_map ref_counts;//记录引用数量,如果引用归0了就直接free + for(auto &tensor: this->tensors){ + ref_counts[tensor->getFuid()] = tensor->getTargets().size(); + } + //对需要最终输出的output进行人为+1让它可以被永久保存下来 + for (auto &tensor : this->getOutputs()) { + ref_counts[tensor->getFuid()]++; // 人为加1,保证不被回收 + } + std::unordered_map offsets; + //现在开始遍历Ops模拟内存分配情况: + for(auto &op:this->ops){ + //这里我们检查输出,因为要为每个输出分配空间 + for(auto& tensor:op->getOutputs()){ + size_t size = tensor->getBytes();//拿到大小 + size_t offset = allocator.alloc(size);//申请内存 + offsets[tensor->getFuid()] = offset;//把分配的空间偏移保存下来 + } + //检查输入看看能不能把输入释放了 + for (auto &tensor : op->getInputs()){ + int fuid = tensor->getFuid(); + ref_counts[fuid]--; + if(ref_counts[fuid]==0){ + //首先我们要理解的是,我们这里的分配只是为中间结果分配, + // 外部输入的向量是不包含在这里的,所以我们要检查这个是不是外部的输入。 + if (offsets.find(fuid) != offsets.end()) { + //free掉 + allocator.free(offsets[fuid], tensor->getBytes()); + } + } + } + } + //现在我们为每个fuid对应的tensor找到了合适的offset。 + void *basePtr = allocator.getPtr(); + //开始分配Blob + for(auto&tensor:this->tensors){ + int fuid = tensor->getFuid(); + //首先我们要检查这个是不是外部的输入,或者说万一是计算图多余的 + if(offsets.find(fuid)!=offsets.end()){ + size_t offset = offsets[fuid]; + //void*不能进行算数运算,所以先转成char* + void *ptr = static_cast(basePtr) + offset; + //首先我们要创建一个blob:BlobObj(Runtime runtime, void *ptr) + auto blob = make_ref(this->runtime, ptr); + tensor->setDataBlob(blob); + } + } allocator.info(); } diff --git a/src/operators/concat.cc b/src/operators/concat.cc index d196330..eb0949e 100644 --- a/src/operators/concat.cc +++ b/src/operators/concat.cc @@ -17,6 +17,15 @@ optional> ConcatObj::inferShape(const TensorVec &inputs) { // TODO:修改 dims,返回正确的 concat 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13 // =================================== 作业 =================================== + //沿着dim这一维进行拼接,除了dim其他的不变 + int p_dim = this->dim; + if (p_dim < 0) { + p_dim += rank; + } + + for (size_t i = 1; i < inputs.size(); ++i) { + dims[p_dim] += inputs[i]->getDims()[p_dim]; + } return {{dims}}; } diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc index 7a16ca2..21550be 100644 --- a/src/operators/matmul.cc +++ b/src/operators/matmul.cc @@ -1,5 +1,5 @@ #include "operators/matmul.h" - +#include "utils/operator_utils.h" namespace infini { @@ -27,7 +27,40 @@ namespace infini // TODO:返回经过 matmul 操作后的 shape // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm // =================================== 作业 =================================== - return std::nullopt; + //先拿到两个维度 + //这里是多维的矩阵乘法,以最后两维度作为相乘的维度 + const auto A = inputs[0]; + const auto B = inputs[1]; + auto shapeA = A->getDims(); + auto shapeB = B->getDims(); + + int rankA = shapeA.size(); + int rankB = shapeB.size(); + //检查是否专职 + if (this->transA && rankA >= 2) { + std::swap(shapeA[rankA - 1], shapeA[rankA - 2]); + } + if (this->transB && rankB >= 2) { + std::swap(shapeB[rankB - 1], shapeB[rankB - 2]); + } + //一般是M,N * N,K = M,K + int M = shapeA[rankA - 2]; + int K_A = shapeA[rankA - 1]; + int K_B = shapeB[rankB - 2]; + int N = shapeB[rankB - 1]; + //判断形状对不对 + IT_ASSERT(K_A == K_B); + //其他维度需要广播 + Shape batchA(shapeA.begin(), shapeA.end() - 2); + Shape batchB(shapeB.begin(), shapeB.end() - 2); + + Shape batchOut = infer_broadcast(batchA, batchB); + + Shape outputShape = batchOut; + outputShape.push_back(M); + outputShape.push_back(N); + + return vector{outputShape}; } } // namespace infini \ No newline at end of file diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc index faab2b6..888c1ee 100644 --- a/src/operators/transpose.cc +++ b/src/operators/transpose.cc @@ -34,7 +34,20 @@ namespace infini // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21 // =================================== 作业 =================================== - return std::nullopt; + auto &perm = this->transposePermute; + + if (perm.empty()) {//为空则不用转置 + for (int i = 0; i < rank; ++i) { + output_dim[i] = input_dim[rank - 1 - i]; + } + } else { + for (int i = 0; i < rank; ++i) { + output_dim[i] = input_dim[perm[i]]; + } + } + + // 4. 返回结果 + return vector{output_dim}; } std::string TransposeObj::toString() const diff --git a/src/operators/unary.cc b/src/operators/unary.cc index 3daad36..ce3e9aa 100644 --- a/src/operators/unary.cc +++ b/src/operators/unary.cc @@ -39,7 +39,12 @@ namespace infini // TODO:返回经过 clip 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13 // =================================== 作业 =================================== - return std::nullopt; + //clip形状不变 + const auto &input_tensor = inputs[0]; + + auto input_dim = input_tensor->getDims(); + + return vector{input_dim}; } std::string ClipObj::toString() const @@ -66,7 +71,7 @@ namespace infini // REF_FILE: src/core/operator.cc // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return {}; + return vector{this->getOutputDataType()}; } optional> CastObj::inferShape(const TensorVec &inputs) @@ -75,7 +80,10 @@ namespace infini // TODO:返回经过 cast 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return std::nullopt; + //cast也不改变形状 + const auto &input_tensor = inputs[0]; + auto input_dim = input_tensor->getDims(); + return vector{input_dim}; } std::string CastObj::toString() const diff --git a/src/utils/operator_utils.cc b/src/utils/operator_utils.cc index edbd2c8..1ced1ce 100644 --- a/src/utils/operator_utils.cc +++ b/src/utils/operator_utils.cc @@ -9,8 +9,28 @@ Shape infer_broadcast(const Shape &A, const Shape &B) { // TODO:对 A 和 B 进行双向广播,返回广播后的形状。 // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md // =================================== 作业 =================================== - - return {}; + //右对齐、取最大 + int rankA = A.size(); + int rankB = B.size(); + int max_rank = std::max(rankA, rankB); + Shape res_shape(max_rank); + + for (int k = max_rank - 1, i = rankA - 1, j = rankB - 1; k >= 0; i--, j--, k--) { + int dimA = (i >= 0) ? A[i] : 1; + int dimB = (j >= 0) ? B[j] : 1; + + if (dimA == dimB) { + res_shape[k] = dimA; + } else if (dimA == 1) { + res_shape[k] = dimB; + } else if (dimB == 1) { + res_shape[k] = dimA; + } else { + // 形状不匹配 + IT_ASSERT(false, "Broadcast shape mismatch"); + } + } + return res_shape; } int get_real_axis(const int &axis, const int &rank) { From e9f8a30e8fcb337b29226824679bfaed0745fa88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xzhInfini=E8=AE=AD=E7=BB=83=E8=90=A5?= <365110614@qq.com> Date: Mon, 19 Jan 2026 19:10:27 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/core/graph.h | 3 +- src/core/graph.cc | 113 +++++++++++++++++- test/core/test_graph.cc | 16 +++ .../nativecpu/test_nativecpu_concat.cc | 5 +- 4 files changed, 134 insertions(+), 3 deletions(-) diff --git a/include/core/graph.h b/include/core/graph.h index c45580c..d8c8e7b 100644 --- a/include/core/graph.h +++ b/include/core/graph.h @@ -13,7 +13,8 @@ namespace infini protected: Runtime runtime; TensorVec tensors; - OpVec ops; + OpVec ops; //using OpVec = vector; + //vector ops; Allocator allocator; public: diff --git a/src/core/graph.cc b/src/core/graph.cc index 717aad3..c3810ac 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -2,6 +2,8 @@ #include #include #include +#include "operators/transpose.h" +#include "operators/matmul.h" namespace infini { @@ -42,7 +44,7 @@ namespace infini oss << "Graph Tensors:\n"; for (const auto &tensor : tensors) oss << tensor << "\n"; - + printf("完成tensor访问\n"); oss << "Graph operators:\n"; for (const auto &op : ops) { @@ -56,6 +58,8 @@ namespace infini oss << ", succ " << vecToString(succs); oss << ", " << op << "\n"; } + printf("完成op访问\n"); + return oss.str(); } @@ -106,6 +110,105 @@ namespace infini // 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除) // 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去) // =================================== 作业 =================================== + bool changed = true; + while(changed){ + changed = false; + auto ops = this->ops; + for(auto op:ops){ + //case1::检查如果是两个连续的转置 + + if(op->getOpType()==OpType::Transpose){ + auto trans1 = as(op); + auto tensorA = trans1->getInputs()[0];//输入,不可丢弃 + auto tensorB = trans1->getOutput();//中间变量,可丢弃 + auto prevop = tensorA->getSource();//A的前置op + //如果只有一个去向 + if (tensorB->getTargets().size() == 1){ + auto nextOp = tensorB->getTargets()[0]; + if (nextOp->getOpType() == OpType::Transpose){//如果这个刚好是transpose + auto trans2 = as(nextOp); + auto tensorC = trans2->getOutput();//最终输出,可丢弃 + if (trans1->getPermute() == trans2->getPermute()){ + auto targets = tensorC->getTargets(); + //把C流向的算子们的张量来源设置为A,算子来源设置为prevop; + for (auto target : targets) { + target->removePredecessors(trans2); + if (prevop) { + prevop->addSuccessors(target); + target->addPredecessors(prevop); // 只有当 A 有生产者时,才连接 + } + target->replaceInput(tensorC, tensorA); + tensorA->addTarget(target); + } + if (prevop) { + prevop->removeSuccessors(trans1); + } + tensorA->removeTarget(trans1); + this->removeOperator(trans1); + this->removeOperator(trans2); + this->removeTensor(tensorB); + this->removeTensor(tensorC); + //printf("优化1,成功移除\n"); + changed = true; + break; + } + } + } + } + if (op->getOpType() == OpType::MatMul){ + auto matmul = as(op); + auto inputs = matmul->getInputs(); + for (size_t i = 0; i < 2; ++i){ + auto tensorB = inputs[i]; + auto prevOp = tensorB->getSource(); + if (prevOp && prevOp->getOpType() == OpType::Transpose){ + auto trans = as(prevOp); + auto tensorA = trans->getInputs()[0]; + auto perm = trans->getPermute(); + + int rank = perm.size(); + bool isSwapLastTwo = true; + if (rank < 2) isSwapLastTwo = false; + else { + if (perm[rank-1] != rank-2 || perm[rank-2] != rank-1) isSwapLastTwo = false; + for (int k = 0; k < rank - 2; ++k) { + if (perm[k] != k) { isSwapLastTwo = false; break; } + } + } + if(isSwapLastTwo){ + if (i == 0) matmul->setTransA(!matmul->getTransA()); + else matmul->setTransB(!matmul->getTransB()); + + matmul->replaceInput(tensorB, tensorA); + auto sourceA = tensorA->getSource(); + if(sourceA){ + sourceA->addSuccessors(matmul); + matmul->addPredecessors(sourceA); + + } + matmul->removePredecessors(trans); + trans->removeSuccessors(matmul); + tensorA->addTarget(matmul); + tensorB->removeTarget(matmul); + if(tensorB->getTargets().empty()){ + if(sourceA){ + trans->removePredecessors(sourceA); + sourceA->removeSuccessors(trans); + } + tensorA->removeTarget(trans); + this->removeOperator(trans); + this->removeTensor(tensorB); + } + //printf("优化2,成功\n"); + changed = true; + } + } + } + if(changed) break; + } + } + } + //printf("结束循环\n"); } Tensor GraphObj::getTensor(int fuid) const @@ -160,6 +263,14 @@ namespace infini ref_counts[tensor->getFuid()]++; // 人为加1,保证不被回收 } std::unordered_map offsets; + for (auto &tensor : this->tensors) { + if (!tensor->getSource()) { // 如果没有来源算子,说明它是图的输入 + size_t size = tensor->getBytes(); + size_t offset = allocator.alloc(size); + offsets[tensor->getFuid()] = offset; + } + } + //现在开始遍历Ops模拟内存分配情况: for(auto &op:this->ops){ //这里我们检查输出,因为要为每个输出分配空间 diff --git a/test/core/test_graph.cc b/test/core/test_graph.cc index bf696dd..35df6dd 100644 --- a/test/core/test_graph.cc +++ b/test/core/test_graph.cc @@ -28,13 +28,29 @@ namespace infini // 优化后 g->print(); EXPECT_EQ(g->getOperators().size(), 1); + //printf("passed 1"); EXPECT_EQ(g->getTensors().size(), 3); + //printf("passed 2"); EXPECT_EQ(g->getOperators()[0]->getOpType().underlying(), 7); + //printf("passed 3"); + auto op = as(g->getOperators()[0]); + //printf("passed 4"); + EXPECT_EQ(op->getInputs(0)->getGuid(), 2); + //printf("passed 5"); + EXPECT_EQ(op->getInputs(1)->getGuid(), 3); + //printf("passed 6"); + EXPECT_EQ(op->getOutputs()[0], o); + //printf("passed 7"); + EXPECT_EQ(op->getTransA(), false); + //printf("passed 8"); + EXPECT_EQ(op->getTransB(), true); + //printf("passed 9"); + } } \ No newline at end of file diff --git a/test/kernels/nativecpu/test_nativecpu_concat.cc b/test/kernels/nativecpu/test_nativecpu_concat.cc index fc87fb1..ee724c3 100644 --- a/test/kernels/nativecpu/test_nativecpu_concat.cc +++ b/test/kernels/nativecpu/test_nativecpu_concat.cc @@ -15,10 +15,13 @@ TEST(Concat, NativeCpu) { auto t3 = g->addTensor({2, 2, 2, 1}, DataType::Float32); auto op = g->addOp(TensorVec{t1, t2, t3}, nullptr, 2); g->dataMalloc(); + printf("完成分配内存\n"); t1->setData(IncrementalGenerator()); + printf("1\n"); t2->setData(OneGenerator()); + printf("2\n"); t3->setData(OneGenerator()); - + printf("完成数据分配\n"); runtime->run(g); EXPECT_TRUE(op->getOutput()->equalData( vector{0, 1, 2, 1, 1, 1, 3, 4, 5, 1, 1, 1,