From 3b093b6d1109998541a8f88f9ad2d5e15dcd14b2 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Thu, 30 Apr 2026 09:43:29 +0200 Subject: [PATCH 1/6] feat(EstimatorRowWise.java): implement the first version of the row wise sparsity estimator works for the matrix multiplication and bind test cases for now --- .../sysds/hops/estim/EstimatorRowWise.java | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java new file mode 100644 index 00000000000..4dc2ef416af --- /dev/null +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.hops.estim; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.data.SparseRow; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.meta.DataCharacteristics; +import org.apache.sysds.runtime.meta.MatrixCharacteristics; + +import java.util.stream.DoubleStream; +import java.util.stream.IntStream; + +/** + * This estimator implements an approach based on row-wise sparsity estimation, + * introduced in + * Lin, Chunxu, Wensheng Luo, Yixiang Fang, Chenhao Ma, Xilin Liu and Yuchi Ma: + * On Efficient Large Sparse Matrix Chain Multiplication. + * Proceedings of the ACM on Management of Data 2 (2024): 1 - 27. + */ +public class EstimatorRowWise extends SparsityEstimator { + @Override + public DataCharacteristics estim(MMNode root) { + double[] rsOut = estimInternMMChain(root); + double sparsity = DoubleStream.of(rsOut).average().orElse(0); + + MatrixCharacteristics matrixCharacteristics = getMatrixCharacteristics(root, sparsity); + + return root.setDataCharacteristics(matrixCharacteristics); + } + + @Override + public double estim(MatrixBlock m1, MatrixBlock m2) { + return estim(m1, m2, OpCode.MM); + } + + @Override + public double estim(MatrixBlock m1, MatrixBlock m2, OpCode op) { + if( isExactMetadataOp(op) ) + return estimExactMetaData(m1.getDataCharacteristics(), + m2.getDataCharacteristics(), op).getSparsity(); + + double[] rsOut = estimIntern(m1, m2, op); + return DoubleStream.of(rsOut).average().orElse(0); + } + + @Override + public double estim(MatrixBlock m1, OpCode op) { + if( isExactMetadataOp(op) ) + return estimExactMetaData(m1.getDataCharacteristics(), null, op).getSparsity(); + throw new NotImplementedException(); + } + + private double[] estimInternMMChain(MMNode node) { + return estimInternMMChain(node, null, null); + } + + private double[] estimInternMMChain(MMNode node, double[] rsRightNeighbor, OpCode opRightNeighbor) { + if(node.isLeaf()) { + MatrixBlock mb = node.getData(); + if(rsRightNeighbor == null) + return getRowWiseSparsityVector(mb); + else + return estimIntern(mb, rsRightNeighbor, opRightNeighbor); + } + switch(node.getOp()) { + case MM: + double[] rsRightNode = estimInternMMChain(node.getRight(), rsRightNeighbor, opRightNeighbor); + return estimInternMMChain(node.getLeft(), rsRightNode, node.getOp()); + case CBIND: + case RBIND: + // consider the current node as new DAG for estimation (cut) + double[] rsOut = estimInternBind(estimInternMMChain(node.getLeft()), + estimInternMMChain(node.getRight()), node.getOp()); + if(rsRightNeighbor != null) { + rsOut = estimInternMM(rsOut, rsRightNeighbor); + } + return rsOut; + default: + throw new NotImplementedException(); + } + } + + private double[] estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { + double[] rsM2 = getRowWiseSparsityVector(m2); + return estimIntern(m1, rsM2, op); + } + + private double[] estimIntern(MatrixBlock m1, double[] rsM2, OpCode op) { + switch(op) { + case MM: + return estimInternMM(m1, rsM2); + case CBIND: + case RBIND: + return estimInternBind(getRowWiseSparsityVector(m1), rsM2, op); + default: + throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); + } + } + + // Corresponds to Algorithm 1 in the publication + private double[] estimInternMM(MatrixBlock m1, double[] rsM2) { + double[] rsOut = new double[m1.getNumRows()]; + for(int r = 0; r < m1.getNumRows(); r++) { + int nonZeroCols[] = getNonZeroColumnIndices(m1, r); + double temp = 1; + for(int c : nonZeroCols) { + temp *= (double) 1 - rsM2[c]; + } + rsOut[r] = (double) 1 - temp; + } + return rsOut; + } + + private double[] estimInternMM(double[] rsM1, double[] rsM2) { + double[] rsOut = DoubleStream.of(rsM1).map( + rsM1I -> (double) 1 - DoubleStream.of(rsM2).reduce((double) 1, + (currentVal, rsM2J) -> currentVal * ((double) 1 - (rsM1I * rsM2J)))).toArray(); + return rsOut; + } + + private double[] estimInternBind(double[] rsM1, double[] rsM2, OpCode op) { + switch(op) { + case CBIND: + return IntStream.range(0, rsM1.length) + .mapToDouble(idx -> (double) rsM1[idx] + rsM2[idx]).toArray(); + case RBIND: + return ArrayUtils.addAll(rsM1, rsM2); + default: + throw new DMLRuntimeException("We should never reach this point."); + } + } + + private MatrixCharacteristics getMatrixCharacteristics(MMNode root, double sparsity) { + switch(root.getOp()) { + case MM: + MMNode tmpNode = root; + while(!tmpNode.isLeaf()) { + tmpNode = tmpNode.getLeft(); + } + int numRows = tmpNode.getData().getNumRows(); + tmpNode = root; + while(!tmpNode.isLeaf()) { + tmpNode = tmpNode.getRight(); + } + int numColumns = tmpNode.getData().getNumColumns(); + + return new MatrixCharacteristics( + numRows, numColumns, (long)(numRows * numColumns * sparsity)); + default: + throw new NotImplementedException(); + } + } + + private double[] getRowWiseSparsityVector(MatrixBlock mb) { + int numRows = mb.getNumRows(); + double[] rs = new double[numRows]; + if(mb.isInSparseFormat()) { + for(int counter = 0; counter < numRows; counter++) { + SparseRow sparseRow = mb.getSparseBlock().get(counter); + rs[counter] = (sparseRow == null) ? 0 : (double) sparseRow.size() / mb.getNumColumns(); + } + } + else { + for(int counter = 0; counter < numRows; counter++) { + rs[counter] = (double) mb.getDenseBlock().countNonZeros(counter) / mb.getNumColumns(); + } + } + return rs; + } + + private int[] getNonZeroColumnIndices(MatrixBlock mb, final int rIdx) { + int[] nonZeroCols; + if(mb.isInSparseFormat()) { + SparseRow sparseRow = mb.getSparseBlock().get(rIdx); + nonZeroCols = (sparseRow == null) ? new int[0] : sparseRow.indexes(); + } + else { + nonZeroCols = IntStream.range(0, mb.getNumColumns()) + .filter(cIdx -> mb.get(rIdx, cIdx) != 0).toArray(); + } + return nonZeroCols; + } +}; From 1abb34310323d15debbd5377a4a92b174df34412 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Thu, 30 Apr 2026 09:46:43 +0200 Subject: [PATCH 2/6] feat(test/estim): add some tests for the row wise sparsity estimator to the unity tests for sparsity estimation --- .../test/component/estim/OpBindChainTest.java | 16 ++++++++++++++-- .../sysds/test/component/estim/OpBindTest.java | 14 +++++++++++++- .../test/component/estim/OuterProductTest.java | 11 +++++++++++ .../test/component/estim/SelfProductTest.java | 11 ++++++++++- .../component/estim/SquaredProductChainTest.java | 13 ++++++++++++- .../test/component/estim/SquaredProductTest.java | 13 ++++++++++++- 6 files changed, 72 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpBindChainTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpBindChainTest.java index 35efedaf625..4726cf36daa 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpBindChainTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpBindChainTest.java @@ -24,6 +24,7 @@ import org.apache.sysds.hops.estim.EstimatorBasicWorst; import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.MMNode; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -127,8 +128,19 @@ public void testLGCasecbind() { new EstimatorLayeredGraph(EstimatorLayeredGraph.ROUNDS, 3), m, k, n, sparsity, cbind); } - - + + // Row Wise Sparsity Estimator + @Test + public void testRowWiseRbind() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, sparsity, rbind); + } + + @Test + public void testRowWiseCbind() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, sparsity, cbind); + } + + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp, OpCode op) { MatrixBlock m1; MatrixBlock m2; diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpBindTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpBindTest.java index 3e7ad24fe86..31a9be713bc 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpBindTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpBindTest.java @@ -24,6 +24,7 @@ import org.apache.sysds.hops.estim.EstimatorBasicWorst; import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.SparsityEstimator; import org.apache.sysds.hops.estim.SparsityEstimator.OpCode; @@ -132,7 +133,18 @@ public void testSampleCaserbind() { public void testSampleCasecbind() { runSparsityEstimateTest(new EstimatorSample(), m, k, n, sparsity, cbind); }*/ - + + // Row Wise Sparsity Estimator + @Test + public void testRowWiseRbind() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, sparsity, rbind); + } + + @Test + public void testRowWiseCbind() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, sparsity, cbind); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp, OpCode op) { MatrixBlock m1; diff --git a/src/test/java/org/apache/sysds/test/component/estim/OuterProductTest.java b/src/test/java/org/apache/sysds/test/component/estim/OuterProductTest.java index fdc33d878db..f71d9989ccd 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OuterProductTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OuterProductTest.java @@ -26,6 +26,7 @@ import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorSample; import org.apache.sysds.hops.estim.SparsityEstimator; import org.apache.sysds.runtime.instructions.InstructionUtils; @@ -150,6 +151,16 @@ public void testLayeredGraphCase2() { runSparsityEstimateTest(new EstimatorLayeredGraph(), m, k, n, case2); } + @Test + public void testRowWiseCase1() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, case1); + } + + @Test + public void testRowWiseCase2() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, case2); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 3); diff --git a/src/test/java/org/apache/sysds/test/component/estim/SelfProductTest.java b/src/test/java/org/apache/sysds/test/component/estim/SelfProductTest.java index d99f38d939b..2feeae6fc37 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/SelfProductTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/SelfProductTest.java @@ -28,6 +28,7 @@ import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorSample; import org.apache.sysds.hops.estim.EstimatorSampleRa; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -156,7 +157,15 @@ public void testLayeredGraphCase1() { public void testLayeredGraphCase2() { runSparsityEstimateTest(new EstimatorLayeredGraph(), m, sparsity2); } - + + @Test + public void testRowWiseCase() { + runSparsityEstimateTest(new EstimatorRowWise(), m/4, sparsity0); + runSparsityEstimateTest(new EstimatorRowWise(), m/2, sparsity1); + runSparsityEstimateTest(new EstimatorRowWise(), m, sparsity2); + runSparsityEstimateTest(new EstimatorRowWise(), m, sparsity3); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int n, double sp) { MatrixBlock m1 = MatrixBlock.randOperations(n, n, sp, 1, 1, "uniform", 3); MatrixBlock m3 = m1.aggregateBinaryOperations(m1, m1, diff --git a/src/test/java/org/apache/sysds/test/component/estim/SquaredProductChainTest.java b/src/test/java/org/apache/sysds/test/component/estim/SquaredProductChainTest.java index f799b02c96d..502ed62de29 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/SquaredProductChainTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/SquaredProductChainTest.java @@ -26,6 +26,7 @@ import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.MMNode; import org.apache.sysds.hops.estim.SparsityEstimator; import org.apache.sysds.hops.estim.SparsityEstimator.OpCode; @@ -146,7 +147,17 @@ public void testLayeredGraph32Case1() { public void testLayeredGraph32Case2() { runSparsityEstimateTest(new EstimatorLayeredGraph(32), m, k, n, n2, case2); } - + + @Test + public void testRowWiseCase1() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, n2, case1); + } + + @Test + public void testRowWiseCase2() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, n2, case2); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, int n2, double[] sp) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 1); MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 2); diff --git a/src/test/java/org/apache/sysds/test/component/estim/SquaredProductTest.java b/src/test/java/org/apache/sysds/test/component/estim/SquaredProductTest.java index 2a898f9c39f..678c5daa31a 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/SquaredProductTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/SquaredProductTest.java @@ -25,6 +25,7 @@ import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorSample; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -154,7 +155,17 @@ public void testLayeredGraphCase1() { public void testLayeredGraphCase2() { runSparsityEstimateTest(new EstimatorLayeredGraph(), m, k, n, case2); } - + + @Test + public void testRowWiseCase1() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, case1); + } + + @Test + public void testRowWiseCase2() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, n, case2); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 7); From 71afd61c7b7d9954fd6acfe5a1e0a1040e619e7a Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Mon, 4 May 2026 17:14:25 +0200 Subject: [PATCH 3/6] feat(hops/estim/EstimatorRowWise.java): introduce a separate object container for row wise sparsity vectors to simplify access and allow storing it with chain nodes --- .../sysds/hops/estim/EstimatorRowWise.java | 180 ++++++++++++------ 1 file changed, 124 insertions(+), 56 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index 4dc2ef416af..bdd9f85e0e3 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -21,12 +21,13 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.NotImplementedException; -import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.data.SparseRow; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.meta.DataCharacteristics; import org.apache.sysds.runtime.meta.MatrixCharacteristics; +import java.util.function.DoubleBinaryOperator; +import java.util.function.DoubleUnaryOperator; import java.util.stream.DoubleStream; import java.util.stream.IntStream; @@ -40,8 +41,8 @@ public class EstimatorRowWise extends SparsityEstimator { @Override public DataCharacteristics estim(MMNode root) { - double[] rsOut = estimInternMMChain(root); - double sparsity = DoubleStream.of(rsOut).average().orElse(0); + estimInternChain(root); + double sparsity = ((RSVector)root.getSynopsis()).avg(); MatrixCharacteristics matrixCharacteristics = getMatrixCharacteristics(root, sparsity); @@ -55,12 +56,13 @@ public double estim(MatrixBlock m1, MatrixBlock m2) { @Override public double estim(MatrixBlock m1, MatrixBlock m2, OpCode op) { - if( isExactMetadataOp(op) ) + if( isExactMetadataOp(op) ) { return estimExactMetaData(m1.getDataCharacteristics(), m2.getDataCharacteristics(), op).getSparsity(); + } - double[] rsOut = estimIntern(m1, m2, op); - return DoubleStream.of(rsOut).average().orElse(0); + RSVector rsOut = estimIntern(m1, m2, op); + return rsOut.avg(); } @Override @@ -70,84 +72,115 @@ public double estim(MatrixBlock m1, OpCode op) { throw new NotImplementedException(); } - private double[] estimInternMMChain(MMNode node) { - return estimInternMMChain(node, null, null); + private void estimInternChain(MMNode node) { + estimInternChain(node, null, null); } - private double[] estimInternMMChain(MMNode node, double[] rsRightNeighbor, OpCode opRightNeighbor) { + private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRightNeighbor) { if(node.isLeaf()) { MatrixBlock mb = node.getData(); - if(rsRightNeighbor == null) - return getRowWiseSparsityVector(mb); + RSVector rsOut; + if(rsRightNeighbor != null) + rsOut = estimIntern(mb, rsRightNeighbor, opRightNeighbor); else - return estimIntern(mb, rsRightNeighbor, opRightNeighbor); + rsOut = getRowWiseSparsityVector(mb); + node.setSynopsis(rsOut); + return; } switch(node.getOp()) { case MM: - double[] rsRightNode = estimInternMMChain(node.getRight(), rsRightNeighbor, opRightNeighbor); - return estimInternMMChain(node.getLeft(), rsRightNode, node.getOp()); + estimInternChain(node.getRight(), rsRightNeighbor, opRightNeighbor); + estimInternChain(node.getLeft(), (RSVector)(node.getRight().getSynopsis()), node.getOp()); + node.setSynopsis(node.getLeft().getSynopsis()); + return; case CBIND: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into a cbind operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) + node.setSynopsis(estimIntern(rsCBind, rsRightNeighbor, opRightNeighbor)); + else + node.setSynopsis(rsCBind); + return; case RBIND: - // consider the current node as new DAG for estimation (cut) - double[] rsOut = estimInternBind(estimInternMMChain(node.getLeft()), - estimInternMMChain(node.getRight()), node.getOp()); - if(rsRightNeighbor != null) { - rsOut = estimInternMM(rsOut, rsRightNeighbor); - } - return rsOut; + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into an rbind operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) + node.setSynopsis(estimIntern(rsRBind, rsRightNeighbor, opRightNeighbor)); + else + node.setSynopsis(rsRBind); + return; default: throw new NotImplementedException(); } } - private double[] estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { - double[] rsM2 = getRowWiseSparsityVector(m2); + private RSVector estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { + RSVector rsM2 = getRowWiseSparsityVector(m2); return estimIntern(m1, rsM2, op); } - private double[] estimIntern(MatrixBlock m1, double[] rsM2, OpCode op) { + private RSVector estimIntern(MatrixBlock m1, RSVector rsM2, OpCode op) { switch(op) { case MM: return estimInternMM(m1, rsM2); case CBIND: + return estimInternCBind(getRowWiseSparsityVector(m1), rsM2); case RBIND: - return estimInternBind(getRowWiseSparsityVector(m1), rsM2, op); + return estimInternRBind(getRowWiseSparsityVector(m1), rsM2); default: throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); } } - // Corresponds to Algorithm 1 in the publication - private double[] estimInternMM(MatrixBlock m1, double[] rsM2) { - double[] rsOut = new double[m1.getNumRows()]; - for(int r = 0; r < m1.getNumRows(); r++) { - int nonZeroCols[] = getNonZeroColumnIndices(m1, r); - double temp = 1; - for(int c : nonZeroCols) { - temp *= (double) 1 - rsM2[c]; - } - rsOut[r] = (double) 1 - temp; + private RSVector estimIntern(RSVector rsM1, RSVector rsM2, OpCode op) { + switch(op) { + case MM: + return estimInternMM(rsM1, rsM2); + // case CBIND: + // return estimInternCBind(rsM1, rsM2); + // case RBIND: + // return estimInternRBind(rsM1, rsM2); + default: + throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); } + } + + // Corresponds to Algorithm 1 in the publication + private RSVector estimInternMM(MatrixBlock m1, RSVector rsM2) { + RSVector rsOut = new RSVector(IntStream.range(0, m1.getNumRows()).mapToDouble( + r -> (double) 1 - IntStream.of(getNonZeroColumnIndices(m1, r)).mapToDouble( + c -> (double) 1 - rsM2.get(c) + ).reduce((double) 1, (currentVal, val) -> currentVal * val)) + .toArray()); return rsOut; } - private double[] estimInternMM(double[] rsM1, double[] rsM2) { - double[] rsOut = DoubleStream.of(rsM1).map( - rsM1I -> (double) 1 - DoubleStream.of(rsM2).reduce((double) 1, - (currentVal, rsM2J) -> currentVal * ((double) 1 - (rsM1I * rsM2J)))).toArray(); + // NOTE: this is the best estimation possible when we only have the two row sparsity vectors + private RSVector estimInternMM(RSVector rsM1, RSVector rsM2) { + // double avgRsM2 = DoubleStream.of(rsM2).average().orElse(0); + // RSVector rsOut = DoubleStream.of(rsM1).map( + // rsM1I -> (double) 1 - Math.pow((double) 1 - (rsM1I * avgRsM2), rsM2.length)).toArray(); + RSVector rsOut = rsM1.map( + rsM1I -> (double) 1 - rsM2.reduce((double) 1, + (currentVal, rsM2J) -> currentVal * ((double) 1 - (rsM1I * rsM2J)))); return rsOut; } - private double[] estimInternBind(double[] rsM1, double[] rsM2, OpCode op) { - switch(op) { - case CBIND: - return IntStream.range(0, rsM1.length) - .mapToDouble(idx -> (double) rsM1[idx] + rsM2[idx]).toArray(); - case RBIND: - return ArrayUtils.addAll(rsM1, rsM2); - default: - throw new DMLRuntimeException("We should never reach this point."); - } + private RSVector estimInternCBind(RSVector rsM1, RSVector rsM2) { + return new RSVector(IntStream.range(0, rsM1.size()).mapToDouble( + idx -> (rsM1.get(idx) + rsM2.get(idx)) / (double) 2).toArray()); + } + + private RSVector estimInternRBind(RSVector rsM1, RSVector rsM2) { + return rsM1.append(rsM2); } private MatrixCharacteristics getMatrixCharacteristics(MMNode root, double sparsity) { @@ -171,21 +204,20 @@ private MatrixCharacteristics getMatrixCharacteristics(MMNode root, double spars } } - private double[] getRowWiseSparsityVector(MatrixBlock mb) { + private RSVector getRowWiseSparsityVector(MatrixBlock mb) { int numRows = mb.getNumRows(); - double[] rs = new double[numRows]; if(mb.isInSparseFormat()) { + double[] rsArray = new double[numRows]; for(int counter = 0; counter < numRows; counter++) { SparseRow sparseRow = mb.getSparseBlock().get(counter); - rs[counter] = (sparseRow == null) ? 0 : (double) sparseRow.size() / mb.getNumColumns(); + rsArray[counter] = (sparseRow == null) ? 0 : (double) sparseRow.size() / mb.getNumColumns(); } + return new RSVector(rsArray); } else { - for(int counter = 0; counter < numRows; counter++) { - rs[counter] = (double) mb.getDenseBlock().countNonZeros(counter) / mb.getNumColumns(); - } + return new RSVector(IntStream.range(0, numRows).mapToDouble( + rIdx -> (double) mb.getDenseBlock().countNonZeros(rIdx) / mb.getNumColumns()).toArray()); } - return rs; } private int[] getNonZeroColumnIndices(MatrixBlock mb, final int rIdx) { @@ -200,4 +232,40 @@ private int[] getNonZeroColumnIndices(MatrixBlock mb, final int rIdx) { } return nonZeroCols; } + + public static class RSVector { + private final double[] rs; + + public RSVector(double[] rs) { + this.rs = rs; + } + + public double[] get() { + return this.rs; + } + + public double get(int idx) { + return this.rs[idx]; + } + + public int size() { + return this.rs.length; + } + + public double avg() { + return DoubleStream.of(this.rs).average().orElse(0); + } + + public RSVector append(RSVector that) { + return new RSVector(ArrayUtils.addAll(this.rs, that.get())); + } + + public RSVector map(DoubleUnaryOperator mapper) { + return new RSVector(DoubleStream.of(this.rs).map(mapper).toArray()); + } + + public double reduce(double identity, DoubleBinaryOperator op) { + return DoubleStream.of(this.rs).reduce(identity, op); + } + }; }; From ef4a026d0ec2c15eb6401f508d041afc15951bd2 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Tue, 5 May 2026 16:00:10 +0200 Subject: [PATCH 4/6] fix(hops/estim/EstimatorRowWise.java): fix derivation of output data characteristics --- .../sysds/hops/estim/EstimatorRowWise.java | 138 ++++++++++-------- 1 file changed, 79 insertions(+), 59 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index bdd9f85e0e3..ef951385948 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.runtime.data.SparseRow; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.meta.DataCharacteristics; @@ -44,9 +45,8 @@ public DataCharacteristics estim(MMNode root) { estimInternChain(root); double sparsity = ((RSVector)root.getSynopsis()).avg(); - MatrixCharacteristics matrixCharacteristics = getMatrixCharacteristics(root, sparsity); - - return root.setDataCharacteristics(matrixCharacteristics); + DataCharacteristics outputCharacteristics = deriveOutputCharacteristics(root, sparsity); + return root.setDataCharacteristics(outputCharacteristics); } @Override @@ -77,49 +77,53 @@ private void estimInternChain(MMNode node) { } private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRightNeighbor) { + RSVector rsOut; if(node.isLeaf()) { MatrixBlock mb = node.getData(); - RSVector rsOut; if(rsRightNeighbor != null) rsOut = estimIntern(mb, rsRightNeighbor, opRightNeighbor); else rsOut = getRowWiseSparsityVector(mb); - node.setSynopsis(rsOut); - return; } - switch(node.getOp()) { - case MM: - estimInternChain(node.getRight(), rsRightNeighbor, opRightNeighbor); - estimInternChain(node.getLeft(), (RSVector)(node.getRight().getSynopsis()), node.getOp()); - node.setSynopsis(node.getLeft().getSynopsis()); - return; - case CBIND: - /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of - * the right neighbor cannot be aggregated into a cbind operation when having only row sparsity vectors - */ - estimInternChain(node.getLeft()); - estimInternChain(node.getRight()); - RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); - if(rsRightNeighbor != null) - node.setSynopsis(estimIntern(rsCBind, rsRightNeighbor, opRightNeighbor)); - else - node.setSynopsis(rsCBind); - return; - case RBIND: - /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of - * the right neighbor cannot be aggregated into an rbind operation when having only row sparsity vectors - */ - estimInternChain(node.getLeft()); - estimInternChain(node.getRight()); - RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); - if(rsRightNeighbor != null) - node.setSynopsis(estimIntern(rsRBind, rsRightNeighbor, opRightNeighbor)); - else - node.setSynopsis(rsRBind); - return; - default: - throw new NotImplementedException(); + else { + switch(node.getOp()) { + case MM: + estimInternChain(node.getRight(), rsRightNeighbor, opRightNeighbor); + estimInternChain(node.getLeft(), (RSVector)(node.getRight().getSynopsis()), node.getOp()); + rsOut = (RSVector)node.getLeft().getSynopsis(); + break; + case CBIND: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into a cbind operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) + rsOut = (RSVector)estimIntern(rsCBind, rsRightNeighbor, opRightNeighbor); + else + rsOut = (RSVector)rsCBind; + break; + case RBIND: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into an rbind operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) + rsOut = (RSVector)estimIntern(rsRBind, rsRightNeighbor, opRightNeighbor); + else + rsOut = (RSVector)rsRBind; + break; + default: + throw new NotImplementedException("Chain estimation for operator " + node.getOp().toString() + + " is not supported yet."); + } } + node.setSynopsis(rsOut); + node.setDataCharacteristics(deriveOutputCharacteristics(node, rsOut.avg())); + return; } private RSVector estimIntern(MatrixBlock m1, MatrixBlock m2, OpCode op) { @@ -183,27 +187,6 @@ private RSVector estimInternRBind(RSVector rsM1, RSVector rsM2) { return rsM1.append(rsM2); } - private MatrixCharacteristics getMatrixCharacteristics(MMNode root, double sparsity) { - switch(root.getOp()) { - case MM: - MMNode tmpNode = root; - while(!tmpNode.isLeaf()) { - tmpNode = tmpNode.getLeft(); - } - int numRows = tmpNode.getData().getNumRows(); - tmpNode = root; - while(!tmpNode.isLeaf()) { - tmpNode = tmpNode.getRight(); - } - int numColumns = tmpNode.getData().getNumColumns(); - - return new MatrixCharacteristics( - numRows, numColumns, (long)(numRows * numColumns * sparsity)); - default: - throw new NotImplementedException(); - } - } - private RSVector getRowWiseSparsityVector(MatrixBlock mb) { int numRows = mb.getNumRows(); if(mb.isInSparseFormat()) { @@ -233,6 +216,43 @@ private int[] getNonZeroColumnIndices(MatrixBlock mb, final int rIdx) { return nonZeroCols; } + public static DataCharacteristics deriveOutputCharacteristics(MMNode node, double spOut) { + if(node.isLeaf() || + (node.getDataCharacteristics() != null && node.getDataCharacteristics().getNonZeros() != -1)) { + return node.getDataCharacteristics(); + } + + MMNode nodeLeft = node.getLeft(); + MMNode nodeRight = node.getRight(); + switch(node.getOp()) { + case MM: + return new MatrixCharacteristics(nodeLeft.getRows(), nodeRight.getCols(), + OptimizerUtils.getNnz(nodeLeft.getRows(), nodeRight.getCols(), spOut)); + case MULT: + case PLUS: + case NEQZERO: + case EQZERO: + return new MatrixCharacteristics(nodeLeft.getRows(), nodeLeft.getCols(), + OptimizerUtils.getNnz(nodeLeft.getRows(), nodeLeft.getCols(), spOut)); + case RBIND: + return new MatrixCharacteristics(nodeLeft.getRows()+nodeLeft.getRows(), nodeLeft.getCols(), + OptimizerUtils.getNnz(nodeLeft.getRows()+nodeRight.getRows(), nodeLeft.getCols(), spOut)); + case CBIND: + return new MatrixCharacteristics(nodeLeft.getRows(), nodeLeft.getCols()+nodeRight.getCols(), + OptimizerUtils.getNnz(nodeLeft.getRows(), nodeLeft.getCols()+nodeRight.getCols(), spOut)); + case DIAG: + int ncol = nodeLeft.getCols()==1 ? nodeLeft.getRows() : 1; + return new MatrixCharacteristics(nodeLeft.getRows(), ncol, + OptimizerUtils.getNnz(nodeLeft.getRows(), ncol, spOut)); + case TRANS: + case RESHAPE: + throw new NotImplementedException("Characteristics derivation for trans and reshape has not been " + + "implemented yet, but could be implemented similar to EstimatorMatrixHistogram.java"); + default: + throw new NotImplementedException(); + } + } + public static class RSVector { private final double[] rs; From da4c2c5adf8e972ff04833fc58c4ca8acacf1ef9 Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Wed, 6 May 2026 16:45:36 +0200 Subject: [PATCH 5/6] feat(test/component/estim): add unit tests for row wise sparsity estimator with element-wise and single operations --- .../component/estim/OpElemWChainTest.java | 15 +++++++- .../test/component/estim/OpElemWTest.java | 14 ++++++- .../test/component/estim/OpSingleTest.java | 37 +++++++++++++++---- 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpElemWChainTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpElemWChainTest.java index a1b6594a927..2388f50d50e 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpElemWChainTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpElemWChainTest.java @@ -25,6 +25,7 @@ import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.MMNode; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -118,8 +119,18 @@ public void testLGCasemult() { public void testLGCaseplus() { runSparsityEstimateTest(new EstimatorLayeredGraph(), m, n, sparsity, plus); } - - + + // Row Wise Sparsity Estimator + @Test + public void testRowWiseCaseMult() { + runSparsityEstimateTest(new EstimatorRowWise(), m, n, sparsity, mult); + } + + @Test + public void testRowWiseCasePlus() { + runSparsityEstimateTest(new EstimatorRowWise(), m, n, sparsity, plus); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int n, double[] sp, OpCode op) { MatrixBlock m1 = MatrixBlock.randOperations(m, n, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(m, n, sp[1], 1, 1, "uniform", 5); diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpElemWTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpElemWTest.java index f8ddb91bcef..8d9710dafb1 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpElemWTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpElemWTest.java @@ -25,6 +25,7 @@ import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorDensityMap; import org.apache.sysds.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; import org.apache.sysds.hops.estim.EstimatorSample; import org.apache.sysds.hops.estim.SparsityEstimator; @@ -128,7 +129,18 @@ public void testSampleMult() { public void testSamplePlus() { runSparsityEstimateTest(new EstimatorSample(), m, n, sparsity, plus); } - + + // Row Wise Sparsity Estimator + @Test + public void testRowWiseMult() { + runSparsityEstimateTest(new EstimatorRowWise(), m, n, sparsity, mult); + } + + @Test + public void testRowWisePlus() { + runSparsityEstimateTest(new EstimatorRowWise(), m, n, sparsity, plus); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int n, double[] sp, OpCode op) { MatrixBlock m1 = MatrixBlock.randOperations(m, n, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(m, n, sp[1], 1, 1, "uniform", 7); diff --git a/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java b/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java index d40f84c4fb3..1e39847ab37 100644 --- a/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java +++ b/src/test/java/org/apache/sysds/test/component/estim/OpSingleTest.java @@ -26,6 +26,7 @@ import org.apache.sysds.hops.estim.EstimatorBasicWorst; import org.apache.sysds.hops.estim.EstimatorBitsetMM; import org.apache.sysds.hops.estim.EstimatorLayeredGraph; +import org.apache.sysds.hops.estim.EstimatorRowWise; import org.apache.sysds.hops.estim.SparsityEstimator; import org.apache.sysds.hops.estim.SparsityEstimator.OpCode; import org.apache.sysds.runtime.matrix.data.MatrixBlock; @@ -40,7 +41,7 @@ public class OpSingleTest extends AutomatedTestBase private final static int m = 600; private final static int k = 300; private final static double sparsity = 0.2; -// private final static OpCode eqzero = OpCode.EQZERO; + // private final static OpCode eqzero = OpCode.EQZERO; private final static OpCode diag = OpCode.DIAG; private final static OpCode neqzero = OpCode.NEQZERO; private final static OpCode trans = OpCode.TRANS; @@ -237,7 +238,33 @@ public void testLGCasetrans() { // public void testSampleCasereshape() { // runSparsityEstimateTest(new EstimatorSample(), m, k, sparsity, reshape); // } - + + // Row Wise Sparsity Estimator + // @Test + // public void testRowWiseEqzero() { + // runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, eqzero); + // } + + // @Test + // public void testRowWiseDiag() { + // runSparsityEstimateTest(new EstimatorRowWise(), m, m, sparsity, diag); + // } + + @Test + public void testRowWiseNeqzero() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, neqzero); + } + + @Test + public void testRowWiseTrans() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, trans); + } + + @Test + public void testRowWiseReshape() { + runSparsityEstimateTest(new EstimatorRowWise(), m, k, sparsity, reshape); + } + private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, double sp, OpCode op) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp, 1, 1, "uniform", 3); MatrixBlock m2 = new MatrixBlock(); @@ -252,13 +279,7 @@ private static void runSparsityEstimateTest(SparsityEstimator estim, int m, int est = estim.estim(m1, op); break; case NEQZERO: - m2 = m1; - est = estim.estim(m1, op); - break; case TRANS: - m2 = m1; - est = estim.estim(m1, op); - break; case RESHAPE: m2 = m1; est = estim.estim(m1, op); From 9aa7d526720623134d546a8bdd4c3d39d174487b Mon Sep 17 00:00:00 2001 From: ywcb00 Date: Wed, 6 May 2026 16:47:21 +0200 Subject: [PATCH 6/6] feat(main/hops/estim/EstimatorRowWise.java): add support for element-wise and single operations NOTE: using average case estimation per row --- .../sysds/hops/estim/EstimatorRowWise.java | 97 +++++++++++++++---- 1 file changed, 78 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java index ef951385948..eaffc520fc6 100644 --- a/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java +++ b/src/main/java/org/apache/sysds/hops/estim/EstimatorRowWise.java @@ -49,7 +49,7 @@ public DataCharacteristics estim(MMNode root) { return root.setDataCharacteristics(outputCharacteristics); } - @Override + @Override public double estim(MatrixBlock m1, MatrixBlock m2) { return estim(m1, m2, OpCode.MM); } @@ -99,8 +99,12 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi estimInternChain(node.getLeft()); estimInternChain(node.getRight()); RSVector rsCBind = estimInternCBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); - if(rsRightNeighbor != null) - rsOut = (RSVector)estimIntern(rsCBind, rsRightNeighbor, opRightNeighbor); + if(rsRightNeighbor != null) { + rsOut = (RSVector)estimInternMMFallback(rsCBind, rsRightNeighbor); + if(opRightNeighbor != OpCode.MM) + throw new NotImplementedException("Fallback sparsity estimation has only been " + + "considered for MM operation w/ right neighbor, yet"); + } else rsOut = (RSVector)rsCBind; break; @@ -111,11 +115,47 @@ private void estimInternChain(MMNode node, RSVector rsRightNeighbor, OpCode opRi estimInternChain(node.getLeft()); estimInternChain(node.getRight()); RSVector rsRBind = estimInternRBind((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); - if(rsRightNeighbor != null) - rsOut = (RSVector)estimIntern(rsRBind, rsRightNeighbor, opRightNeighbor); + if(rsRightNeighbor != null) { + rsOut = (RSVector)estimInternMMFallback(rsRBind, rsRightNeighbor); + if(opRightNeighbor != OpCode.MM) + throw new NotImplementedException("Fallback sparsity estimation has only been " + + "considered for MM operation w/ right neighbor, yet"); + } else rsOut = (RSVector)rsRBind; break; + case PLUS: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into an element-wise operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsPlus = estimInternPlus((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) { + rsOut = (RSVector)estimInternMMFallback(rsPlus, rsRightNeighbor); + if(opRightNeighbor != OpCode.MM) + throw new NotImplementedException("Fallback sparsity estimation has only been " + + "considered for MM operation w/ right neighbor, yet"); + } + else + rsOut = (RSVector)rsPlus; + break; + case MULT: + /** NOTE: considering the current node as new DAG for estimation (cut), since the row sparsity of + * the right neighbor cannot be aggregated into an element-wise operation when having only row sparsity vectors + */ + estimInternChain(node.getLeft()); + estimInternChain(node.getRight()); + RSVector rsMult = estimInternMult((RSVector)(node.getLeft().getSynopsis()), (RSVector)(node.getRight().getSynopsis())); + if(rsRightNeighbor != null) { + rsOut = (RSVector)estimInternMMFallback(rsMult, rsRightNeighbor); + if(opRightNeighbor != OpCode.MM) + throw new NotImplementedException("Fallback sparsity estimation has only been " + + "considered for MM operation w/ right neighbor, yet"); + } + else + rsOut = (RSVector)rsMult; + break; default: throw new NotImplementedException("Chain estimation for operator " + node.getOp().toString() + " is not supported yet."); @@ -139,19 +179,10 @@ private RSVector estimIntern(MatrixBlock m1, RSVector rsM2, OpCode op) { return estimInternCBind(getRowWiseSparsityVector(m1), rsM2); case RBIND: return estimInternRBind(getRowWiseSparsityVector(m1), rsM2); - default: - throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); - } - } - - private RSVector estimIntern(RSVector rsM1, RSVector rsM2, OpCode op) { - switch(op) { - case MM: - return estimInternMM(rsM1, rsM2); - // case CBIND: - // return estimInternCBind(rsM1, rsM2); - // case RBIND: - // return estimInternRBind(rsM1, rsM2); + case PLUS: + return estimInternPlus(getRowWiseSparsityVector(m1), rsM2); + case MULT: + return estimInternMult(getRowWiseSparsityVector(m1), rsM2); default: throw new NotImplementedException("Sparsity estimation for operation " + op.toString() + " not supported yet."); } @@ -168,7 +199,8 @@ private RSVector estimInternMM(MatrixBlock m1, RSVector rsM2) { } // NOTE: this is the best estimation possible when we only have the two row sparsity vectors - private RSVector estimInternMM(RSVector rsM1, RSVector rsM2) { + private RSVector estimInternMMFallback(RSVector rsM1, RSVector rsM2) { + // NOTE: Considering the average would probably not be far off while saving computing time // double avgRsM2 = DoubleStream.of(rsM2).average().orElse(0); // RSVector rsOut = DoubleStream.of(rsM1).map( // rsM1I -> (double) 1 - Math.pow((double) 1 - (rsM1I * avgRsM2), rsM2.length)).toArray(); @@ -187,6 +219,18 @@ private RSVector estimInternRBind(RSVector rsM1, RSVector rsM2) { return rsM1.append(rsM2); } + private RSVector estimInternPlus(RSVector rsM1, RSVector rsM2) { + // row-wise average case estimates + // rsM1 + rsM2 - (rsM1 * rsM2) + return rsM1.add(rsM2).subtract(rsM1.multiply(rsM2)); + } + + private RSVector estimInternMult(RSVector rsM1, RSVector rsM2) { + // row-wise average case estimates + // rsM1 * rsM2 + return rsM1.multiply(rsM2); + } + private RSVector getRowWiseSparsityVector(MatrixBlock mb) { int numRows = mb.getNumRows(); if(mb.isInSparseFormat()) { @@ -287,5 +331,20 @@ public RSVector map(DoubleUnaryOperator mapper) { public double reduce(double identity, DoubleBinaryOperator op) { return DoubleStream.of(this.rs).reduce(identity, op); } + + public RSVector add(RSVector that) { + return new RSVector(IntStream.range(0, this.size()).mapToDouble( + idx -> this.get(idx) + that.get(idx)).toArray()); + } + + public RSVector subtract(RSVector that) { + return new RSVector(IntStream.range(0, this.size()).mapToDouble( + idx -> this.get(idx) - that.get(idx)).toArray()); + } + + public RSVector multiply(RSVector that) { + return new RSVector(IntStream.range(0, this.size()).mapToDouble( + idx -> this.get(idx) * that.get(idx)).toArray()); + } }; };