Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingBernoulliNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingBernoulliNaiveBayesOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingBernoulliNaiveBayesOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Bernoulli Naive Bayes"
info.operatorDescription shouldBe "Sklearn Training: Bernoulli Naive Bayes Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingBernoulliNaiveBayesOpDesc" should "default its config fields" in {
val d = new SklearnTrainingBernoulliNaiveBayesOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingBernoulliNaiveBayesOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingBernoulliNaiveBayesOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingBernoulliNaiveBayesOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingBernoulliNaiveBayesOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.naive_bayes import BernoulliNB")
code should include("make_pipeline")
code should include("Training: Bernoulli Naive Bayes")
}

"SklearnTrainingBernoulliNaiveBayesOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingBernoulliNaiveBayesOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingBernoulliNaiveBayes\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingBernoulliNaiveBayesOpDesc]
val r = restored.asInstanceOf[SklearnTrainingBernoulliNaiveBayesOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingComplementNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingComplementNaiveBayesOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingComplementNaiveBayesOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Complement Naive Bayes"
info.operatorDescription shouldBe "Sklearn Training: Complement Naive Bayes Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingComplementNaiveBayesOpDesc" should "default its config fields" in {
val d = new SklearnTrainingComplementNaiveBayesOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingComplementNaiveBayesOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingComplementNaiveBayesOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingComplementNaiveBayesOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingComplementNaiveBayesOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.naive_bayes import ComplementNB")
code should include("make_pipeline")
code should include("Training: Complement Naive Bayes")
}

"SklearnTrainingComplementNaiveBayesOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingComplementNaiveBayesOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingComplementNaiveBayes\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingComplementNaiveBayesOpDesc]
val r = restored.asInstanceOf[SklearnTrainingComplementNaiveBayesOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingDecisionTreeOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingDecisionTreeOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingDecisionTreeOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Decision Tree"
info.operatorDescription shouldBe "Sklearn Training: Decision Tree Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingDecisionTreeOpDesc" should "default its config fields" in {
val d = new SklearnTrainingDecisionTreeOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingDecisionTreeOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingDecisionTreeOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingDecisionTreeOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingDecisionTreeOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.tree import DecisionTreeClassifier")
code should include("make_pipeline")
code should include("Training: Decision Tree")
}

"SklearnTrainingDecisionTreeOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingDecisionTreeOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingDecisionTree\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingDecisionTreeOpDesc]
val r = restored.asInstanceOf[SklearnTrainingDecisionTreeOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingExtraTreeOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingExtraTreeOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingExtraTreeOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Extra Tree"
info.operatorDescription shouldBe "Sklearn Training: Extra Tree Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingExtraTreeOpDesc" should "default its config fields" in {
val d = new SklearnTrainingExtraTreeOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingExtraTreeOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingExtraTreeOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingExtraTreeOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingExtraTreeOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.tree import ExtraTreeClassifier")
code should include("make_pipeline")
code should include("Training: Extra Tree")
}

"SklearnTrainingExtraTreeOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingExtraTreeOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingExtraTree\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingExtraTreeOpDesc]
val r = restored.asInstanceOf[SklearnTrainingExtraTreeOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingExtraTreesOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingExtraTreesOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingExtraTreesOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Extra Trees"
info.operatorDescription shouldBe "Sklearn Training: Extra Trees Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingExtraTreesOpDesc" should "default its config fields" in {
val d = new SklearnTrainingExtraTreesOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingExtraTreesOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingExtraTreesOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingExtraTreesOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingExtraTreesOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.ensemble import ExtraTreesClassifier")
code should include("make_pipeline")
code should include("Training: Extra Trees")
}

"SklearnTrainingExtraTreesOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingExtraTreesOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingExtraTrees\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingExtraTreesOpDesc]
val r = restored.asInstanceOf[SklearnTrainingExtraTreesOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Loading
Loading