From 5f6e1f85e605560575aaafceb4401b54dac15c60 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Fri, 26 Jun 2026 01:52:01 -0700 Subject: [PATCH] test(workflow-operator): add unit test coverage for Sklearn training NB & tree descriptors --- ...rainingBernoulliNaiveBayesOpDescSpec.scala | 79 +++++++++++++++++++ ...ainingComplementNaiveBayesOpDescSpec.scala | 79 +++++++++++++++++++ ...klearnTrainingDecisionTreeOpDescSpec.scala | 79 +++++++++++++++++++ .../SklearnTrainingExtraTreeOpDescSpec.scala | 79 +++++++++++++++++++ .../SklearnTrainingExtraTreesOpDescSpec.scala | 79 +++++++++++++++++++ ...TrainingGaussianNaiveBayesOpDescSpec.scala | 79 +++++++++++++++++++ ...iningMultinomialNaiveBayesOpDescSpec.scala | 79 +++++++++++++++++++ ...klearnTrainingRandomForestOpDescSpec.scala | 79 +++++++++++++++++++ 8 files changed, 632 insertions(+) create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingBernoulliNaiveBayesOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingComplementNaiveBayesOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingDecisionTreeOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingExtraTreeOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingExtraTreesOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingGaussianNaiveBayesOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingMultinomialNaiveBayesOpDescSpec.scala create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingRandomForestOpDescSpec.scala diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingBernoulliNaiveBayesOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingBernoulliNaiveBayesOpDescSpec.scala new file mode 100644 index 00000000000..7bfe9c0905b --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingBernoulliNaiveBayesOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingBernoulliNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingBernoulliNaiveBayesOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingBernoulliNaiveBayesOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Bernoulli Naive Bayes" + info.operatorDescription shouldBe "Sklearn Training: Bernoulli Naive Bayes Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingBernoulliNaiveBayesOpDesc" should "default its config fields" in { + val d = new SklearnTrainingBernoulliNaiveBayesOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingBernoulliNaiveBayesOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingBernoulliNaiveBayesOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingBernoulliNaiveBayesOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingBernoulliNaiveBayesOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.naive_bayes import BernoulliNB") + code should include("make_pipeline") + code should include("Training: Bernoulli Naive Bayes") + } + + "SklearnTrainingBernoulliNaiveBayesOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingBernoulliNaiveBayesOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingBernoulliNaiveBayes\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingBernoulliNaiveBayesOpDesc] + val r = restored.asInstanceOf[SklearnTrainingBernoulliNaiveBayesOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingComplementNaiveBayesOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingComplementNaiveBayesOpDescSpec.scala new file mode 100644 index 00000000000..1c30b439b26 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingComplementNaiveBayesOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingComplementNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingComplementNaiveBayesOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingComplementNaiveBayesOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Complement Naive Bayes" + info.operatorDescription shouldBe "Sklearn Training: Complement Naive Bayes Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingComplementNaiveBayesOpDesc" should "default its config fields" in { + val d = new SklearnTrainingComplementNaiveBayesOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingComplementNaiveBayesOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingComplementNaiveBayesOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingComplementNaiveBayesOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingComplementNaiveBayesOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.naive_bayes import ComplementNB") + code should include("make_pipeline") + code should include("Training: Complement Naive Bayes") + } + + "SklearnTrainingComplementNaiveBayesOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingComplementNaiveBayesOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingComplementNaiveBayes\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingComplementNaiveBayesOpDesc] + val r = restored.asInstanceOf[SklearnTrainingComplementNaiveBayesOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingDecisionTreeOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingDecisionTreeOpDescSpec.scala new file mode 100644 index 00000000000..33031a81d7a --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingDecisionTreeOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingDecisionTreeOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingDecisionTreeOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingDecisionTreeOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Decision Tree" + info.operatorDescription shouldBe "Sklearn Training: Decision Tree Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingDecisionTreeOpDesc" should "default its config fields" in { + val d = new SklearnTrainingDecisionTreeOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingDecisionTreeOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingDecisionTreeOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingDecisionTreeOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingDecisionTreeOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.tree import DecisionTreeClassifier") + code should include("make_pipeline") + code should include("Training: Decision Tree") + } + + "SklearnTrainingDecisionTreeOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingDecisionTreeOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingDecisionTree\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingDecisionTreeOpDesc] + val r = restored.asInstanceOf[SklearnTrainingDecisionTreeOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingExtraTreeOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingExtraTreeOpDescSpec.scala new file mode 100644 index 00000000000..f43606ed5ab --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingExtraTreeOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingExtraTreeOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingExtraTreeOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingExtraTreeOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Extra Tree" + info.operatorDescription shouldBe "Sklearn Training: Extra Tree Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingExtraTreeOpDesc" should "default its config fields" in { + val d = new SklearnTrainingExtraTreeOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingExtraTreeOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingExtraTreeOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingExtraTreeOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingExtraTreeOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.tree import ExtraTreeClassifier") + code should include("make_pipeline") + code should include("Training: Extra Tree") + } + + "SklearnTrainingExtraTreeOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingExtraTreeOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingExtraTree\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingExtraTreeOpDesc] + val r = restored.asInstanceOf[SklearnTrainingExtraTreeOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingExtraTreesOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingExtraTreesOpDescSpec.scala new file mode 100644 index 00000000000..e404a05c0de --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingExtraTreesOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingExtraTreesOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingExtraTreesOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingExtraTreesOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Extra Trees" + info.operatorDescription shouldBe "Sklearn Training: Extra Trees Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingExtraTreesOpDesc" should "default its config fields" in { + val d = new SklearnTrainingExtraTreesOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingExtraTreesOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingExtraTreesOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingExtraTreesOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingExtraTreesOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.ensemble import ExtraTreesClassifier") + code should include("make_pipeline") + code should include("Training: Extra Trees") + } + + "SklearnTrainingExtraTreesOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingExtraTreesOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingExtraTrees\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingExtraTreesOpDesc] + val r = restored.asInstanceOf[SklearnTrainingExtraTreesOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingGaussianNaiveBayesOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingGaussianNaiveBayesOpDescSpec.scala new file mode 100644 index 00000000000..7d193883ccf --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingGaussianNaiveBayesOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingGaussianNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingGaussianNaiveBayesOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingGaussianNaiveBayesOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Gaussian Naive Bayes" + info.operatorDescription shouldBe "Sklearn Training: Gaussian Naive Bayes Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingGaussianNaiveBayesOpDesc" should "default its config fields" in { + val d = new SklearnTrainingGaussianNaiveBayesOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingGaussianNaiveBayesOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingGaussianNaiveBayesOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingGaussianNaiveBayesOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingGaussianNaiveBayesOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.naive_bayes import GaussianNB") + code should include("make_pipeline") + code should include("Training: Gaussian Naive Bayes") + } + + "SklearnTrainingGaussianNaiveBayesOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingGaussianNaiveBayesOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingGaussianNaiveBayes\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingGaussianNaiveBayesOpDesc] + val r = restored.asInstanceOf[SklearnTrainingGaussianNaiveBayesOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingMultinomialNaiveBayesOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingMultinomialNaiveBayesOpDescSpec.scala new file mode 100644 index 00000000000..6e1bda2cf12 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingMultinomialNaiveBayesOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingMultinomialNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingMultinomialNaiveBayesOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingMultinomialNaiveBayesOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Multinomial Naive Bayes" + info.operatorDescription shouldBe "Sklearn Training: Multinomial Naive Bayes Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingMultinomialNaiveBayesOpDesc" should "default its config fields" in { + val d = new SklearnTrainingMultinomialNaiveBayesOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingMultinomialNaiveBayesOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingMultinomialNaiveBayesOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingMultinomialNaiveBayesOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingMultinomialNaiveBayesOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.naive_bayes import MultinomialNB") + code should include("make_pipeline") + code should include("Training: Multinomial Naive Bayes") + } + + "SklearnTrainingMultinomialNaiveBayesOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingMultinomialNaiveBayesOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingMultinomialNaiveBayes\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingMultinomialNaiveBayesOpDesc] + val r = restored.asInstanceOf[SklearnTrainingMultinomialNaiveBayesOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingRandomForestOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingRandomForestOpDescSpec.scala new file mode 100644 index 00000000000..045bbd2b5a7 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingRandomForestOpDescSpec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn.training + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class SklearnTrainingRandomForestOpDescSpec extends AnyFlatSpec with Matchers { + + "SklearnTrainingRandomForestOpDesc.operatorInfo" should + "advertise the model name, Sklearn Training group, and the single training port" in { + val info = (new SklearnTrainingRandomForestOpDesc).operatorInfo + info.userFriendlyName shouldBe "Training: Random Forest" + info.operatorDescription shouldBe "Sklearn Training: Random Forest Operator" + info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP + info.inputPorts.map(_.displayName) shouldBe List("training") + info.outputPorts should have length 1 + info.outputPorts.head.blocking shouldBe true + } + + "SklearnTrainingRandomForestOpDesc" should "default its config fields" in { + val d = new SklearnTrainingRandomForestOpDesc + d.countVectorizer shouldBe false + d.tfidfTransformer shouldBe false + d.target shouldBe null + d.text shouldBe null + } + + "SklearnTrainingRandomForestOpDesc.getOutputSchemas" should + "emit the model_name/model schema keyed by the declared output port" in { + val d = new SklearnTrainingRandomForestOpDesc + val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id) + schema.getAttribute("model_name").getType shouldBe AttributeType.STRING + schema.getAttribute("model").getType shouldBe AttributeType.BINARY + } + + "SklearnTrainingRandomForestOpDesc.generatePythonCode" should "import the configured sklearn estimator" in { + val d = new SklearnTrainingRandomForestOpDesc + d.target = "y" + val code = d.generatePythonCode() + code should include("from sklearn.ensemble import RandomForestClassifier") + code should include("make_pipeline") + code should include("Training: Random Forest") + } + + "SklearnTrainingRandomForestOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new SklearnTrainingRandomForestOpDesc + d.target = "label" + d.countVectorizer = true + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"SklearnTrainingRandomForest\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[SklearnTrainingRandomForestOpDesc] + val r = restored.asInstanceOf[SklearnTrainingRandomForestOpDesc] + r.target shouldBe "label" + r.countVectorizer shouldBe true + } +}