From 02aa8da2b64a17bd2f31a562416cc25874c64d6c Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Fri, 26 Jun 2026 01:52:02 -0700 Subject: [PATCH] test(workflow-operator): add unit test coverage for Sklearn training ensemble & linear descriptors --- ...rnTrainingAdaptiveBoostingOpDescSpec.scala | 79 +++++++++++++++++++ .../SklearnTrainingBaggingOpDescSpec.scala | 79 +++++++++++++++++++ ...rnTrainingGradientBoostingOpDescSpec.scala | 79 +++++++++++++++++++ ...rnTrainingLinearRegressionOpDescSpec.scala | 79 +++++++++++++++++++ ...ainingLogisticRegressionCVOpDescSpec.scala | 79 +++++++++++++++++++ ...TrainingLogisticRegressionOpDescSpec.scala | 79 +++++++++++++++++++ ...nTrainingPassiveAggressiveOpDescSpec.scala | 79 +++++++++++++++++++ .../SklearnTrainingPerceptronOpDescSpec.scala | 79 +++++++++++++++++++ .../SklearnTrainingSDGOpDescSpec.scala | 79 +++++++++++++++++++ 9 files changed, 711 insertions(+) create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingAdaptiveBoostingOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingBaggingOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingGradientBoostingOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLinearRegressionOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLogisticRegressionCVOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLogisticRegressionOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingPassiveAggressiveOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingPerceptronOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingSDGOpDescSpec.scala diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingAdaptiveBoostingOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingAdaptiveBoostingOpDescSpec.scala new file mode 100644 index 00000000000..5eeeae389d9 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingAdaptiveBoostingOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingAdaptiveBoostingOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingAdaptiveBoostingOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingAdaptiveBoostingOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Adaptive Boosting" + info.operatorDescription shouldBe "Sklearn Training: Adaptive Boosting Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingAdaptiveBoostingOpDesc" should "default its config fields" in { + val d = new SklearnTrainingAdaptiveBoostingOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingAdaptiveBoostingOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingAdaptiveBoostingOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingAdaptiveBoostingOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingAdaptiveBoostingOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.ensemble import AdaBoostClassifier") + code should include("make_pipeline") + code should include("Training: Adaptive Boosting") + } + + "SklearnTrainingAdaptiveBoostingOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingAdaptiveBoostingOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingAdaptiveBoosting\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingAdaptiveBoostingOpDesc] + val r = restored.asInstanceOf[SklearnTrainingAdaptiveBoostingOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingBaggingOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingBaggingOpDescSpec.scala new file mode 100644 index 00000000000..dc5916b4b2b --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingBaggingOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingBaggingOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingBaggingOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingBaggingOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Bagging" + info.operatorDescription shouldBe "Sklearn Training: Bagging Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingBaggingOpDesc" should "default its config fields" in { + val d = new SklearnTrainingBaggingOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingBaggingOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingBaggingOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingBaggingOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingBaggingOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.ensemble import BaggingClassifier") + code should include("make_pipeline") + code should include("Training: Bagging") + } + + "SklearnTrainingBaggingOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingBaggingOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingBagging\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingBaggingOpDesc] + val r = restored.asInstanceOf[SklearnTrainingBaggingOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingGradientBoostingOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingGradientBoostingOpDescSpec.scala new file mode 100644 index 00000000000..dc17a8267a3 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingGradientBoostingOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingGradientBoostingOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingGradientBoostingOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingGradientBoostingOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Gradient Boosting" + info.operatorDescription shouldBe "Sklearn Training: Gradient Boosting Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingGradientBoostingOpDesc" should "default its config fields" in { + val d = new SklearnTrainingGradientBoostingOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingGradientBoostingOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingGradientBoostingOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingGradientBoostingOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingGradientBoostingOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.ensemble import GradientBoostingClassifier") + code should include("make_pipeline") + code should include("Training: Gradient Boosting") + } + + "SklearnTrainingGradientBoostingOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingGradientBoostingOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingGradientBoosting\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingGradientBoostingOpDesc] + val r = restored.asInstanceOf[SklearnTrainingGradientBoostingOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLinearRegressionOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLinearRegressionOpDescSpec.scala new file mode 100644 index 00000000000..7bd4a2af522 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLinearRegressionOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingLinearRegressionOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingLinearRegressionOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingLinearRegressionOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Linear Regression" + info.operatorDescription shouldBe "Sklearn Training: Linear Regression Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingLinearRegressionOpDesc" should "default its config fields" in { + val d = new SklearnTrainingLinearRegressionOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingLinearRegressionOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingLinearRegressionOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingLinearRegressionOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingLinearRegressionOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.linear_model import LinearRegression") + code should include("make_pipeline") + code should include("Training: Linear Regression") + } + + "SklearnTrainingLinearRegressionOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingLinearRegressionOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingLinearRegression\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingLinearRegressionOpDesc] + val r = restored.asInstanceOf[SklearnTrainingLinearRegressionOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLogisticRegressionCVOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLogisticRegressionCVOpDescSpec.scala new file mode 100644 index 00000000000..b0386a50111 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLogisticRegressionCVOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingLogisticRegressionCVOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingLogisticRegressionCVOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingLogisticRegressionCVOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Logistic Regression Cross Validation" + info.operatorDescription shouldBe "Sklearn Training: Logistic Regression Cross Validation Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingLogisticRegressionCVOpDesc" should "default its config fields" in { + val d = new SklearnTrainingLogisticRegressionCVOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingLogisticRegressionCVOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingLogisticRegressionCVOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingLogisticRegressionCVOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingLogisticRegressionCVOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.linear_model import LogisticRegressionCV") + code should include("make_pipeline") + code should include("Training: Logistic Regression Cross Validation") + } + + "SklearnTrainingLogisticRegressionCVOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingLogisticRegressionCVOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingLogisticRegressionCV\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingLogisticRegressionCVOpDesc] + val r = restored.asInstanceOf[SklearnTrainingLogisticRegressionCVOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLogisticRegressionOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLogisticRegressionOpDescSpec.scala new file mode 100644 index 00000000000..e5b04f95a36 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingLogisticRegressionOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingLogisticRegressionOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingLogisticRegressionOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingLogisticRegressionOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Logistic Regression" + info.operatorDescription shouldBe "Sklearn Training: Logistic Regression Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingLogisticRegressionOpDesc" should "default its config fields" in { + val d = new SklearnTrainingLogisticRegressionOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingLogisticRegressionOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingLogisticRegressionOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingLogisticRegressionOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingLogisticRegressionOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.linear_model import LogisticRegression") + code should include("make_pipeline") + code should include("Training: Logistic Regression") + } + + "SklearnTrainingLogisticRegressionOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingLogisticRegressionOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingLogisticRegression\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingLogisticRegressionOpDesc] + val r = restored.asInstanceOf[SklearnTrainingLogisticRegressionOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingPassiveAggressiveOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingPassiveAggressiveOpDescSpec.scala new file mode 100644 index 00000000000..519a85c4164 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingPassiveAggressiveOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingPassiveAggressiveOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingPassiveAggressiveOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingPassiveAggressiveOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Passive Aggressive" + info.operatorDescription shouldBe "Sklearn Training: Passive Aggressive Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingPassiveAggressiveOpDesc" should "default its config fields" in { + val d = new SklearnTrainingPassiveAggressiveOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingPassiveAggressiveOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingPassiveAggressiveOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingPassiveAggressiveOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingPassiveAggressiveOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.linear_model import PassiveAggressiveClassifier") + code should include("make_pipeline") + code should include("Training: Passive Aggressive") + } + + "SklearnTrainingPassiveAggressiveOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingPassiveAggressiveOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingPassiveAggressive\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingPassiveAggressiveOpDesc] + val r = restored.asInstanceOf[SklearnTrainingPassiveAggressiveOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingPerceptronOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingPerceptronOpDescSpec.scala new file mode 100644 index 00000000000..2ce66d8fdf9 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingPerceptronOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingPerceptronOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingPerceptronOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingPerceptronOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Linear Perceptron" + info.operatorDescription shouldBe "Sklearn Training: Linear Perceptron Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingPerceptronOpDesc" should "default its config fields" in { + val d = new SklearnTrainingPerceptronOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingPerceptronOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingPerceptronOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingPerceptronOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingPerceptronOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.linear_model import Perceptron") + code should include("make_pipeline") + code should include("Training: Linear Perceptron") + } + + "SklearnTrainingPerceptronOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingPerceptronOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingPerceptron\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingPerceptronOpDesc] + val r = restored.asInstanceOf[SklearnTrainingPerceptronOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingSDGOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingSDGOpDescSpec.scala new file mode 100644 index 00000000000..26388758431 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingSDGOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingSDGOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingSDGOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingSDGOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Stochastic Gradient Descent" + info.operatorDescription shouldBe "Sklearn Training: Stochastic Gradient Descent Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingSDGOpDesc" should "default its config fields" in { + val d = new SklearnTrainingSDGOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingSDGOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingSDGOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingSDGOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingSDGOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.linear_model import SGDClassifier") + code should include("make_pipeline") + code should include("Training: Stochastic Gradient Descent") + } + + "SklearnTrainingSDGOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingSDGOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingSDG\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingSDGOpDesc] + val r = restored.asInstanceOf[SklearnTrainingSDGOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +}