Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingAdaptiveBoostingOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingAdaptiveBoostingOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingAdaptiveBoostingOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Adaptive Boosting"
info.operatorDescription shouldBe "Sklearn Training: Adaptive Boosting Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingAdaptiveBoostingOpDesc" should "default its config fields" in {
val d = new SklearnTrainingAdaptiveBoostingOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingAdaptiveBoostingOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingAdaptiveBoostingOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingAdaptiveBoostingOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingAdaptiveBoostingOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.ensemble import AdaBoostClassifier")
code should include("make_pipeline")
code should include("Training: Adaptive Boosting")
}

"SklearnTrainingAdaptiveBoostingOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingAdaptiveBoostingOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingAdaptiveBoosting\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingAdaptiveBoostingOpDesc]
val r = restored.asInstanceOf[SklearnTrainingAdaptiveBoostingOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingBaggingOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingBaggingOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingBaggingOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Bagging"
info.operatorDescription shouldBe "Sklearn Training: Bagging Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingBaggingOpDesc" should "default its config fields" in {
val d = new SklearnTrainingBaggingOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingBaggingOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingBaggingOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingBaggingOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingBaggingOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.ensemble import BaggingClassifier")
code should include("make_pipeline")
code should include("Training: Bagging")
}

"SklearnTrainingBaggingOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingBaggingOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingBagging\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingBaggingOpDesc]
val r = restored.asInstanceOf[SklearnTrainingBaggingOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingGradientBoostingOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingGradientBoostingOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingGradientBoostingOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Gradient Boosting"
info.operatorDescription shouldBe "Sklearn Training: Gradient Boosting Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingGradientBoostingOpDesc" should "default its config fields" in {
val d = new SklearnTrainingGradientBoostingOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingGradientBoostingOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingGradientBoostingOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingGradientBoostingOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingGradientBoostingOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.ensemble import GradientBoostingClassifier")
code should include("make_pipeline")
code should include("Training: Gradient Boosting")
}

"SklearnTrainingGradientBoostingOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingGradientBoostingOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingGradientBoosting\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingGradientBoostingOpDesc]
val r = restored.asInstanceOf[SklearnTrainingGradientBoostingOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingLinearRegressionOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingLinearRegressionOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingLinearRegressionOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Linear Regression"
info.operatorDescription shouldBe "Sklearn Training: Linear Regression Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingLinearRegressionOpDesc" should "default its config fields" in {
val d = new SklearnTrainingLinearRegressionOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingLinearRegressionOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingLinearRegressionOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingLinearRegressionOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingLinearRegressionOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.linear_model import LinearRegression")
code should include("make_pipeline")
code should include("Training: Linear Regression")
}

"SklearnTrainingLinearRegressionOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingLinearRegressionOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingLinearRegression\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingLinearRegressionOpDesc]
val r = restored.asInstanceOf[SklearnTrainingLinearRegressionOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.sklearn.training

import org.apache.texera.amber.core.tuple.AttributeType
import org.apache.texera.amber.operator.LogicalOp
import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class SklearnTrainingLogisticRegressionCVOpDescSpec extends AnyFlatSpec with Matchers {

"SklearnTrainingLogisticRegressionCVOpDesc.operatorInfo" should
"advertise the model name, Sklearn Training group, and the single training port" in {
val info = (new SklearnTrainingLogisticRegressionCVOpDesc).operatorInfo
info.userFriendlyName shouldBe "Training: Logistic Regression Cross Validation"
info.operatorDescription shouldBe "Sklearn Training: Logistic Regression Cross Validation Operator"
info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_TRAINING_GROUP
info.inputPorts.map(_.displayName) shouldBe List("training")
info.outputPorts should have length 1
info.outputPorts.head.blocking shouldBe true
}

"SklearnTrainingLogisticRegressionCVOpDesc" should "default its config fields" in {
val d = new SklearnTrainingLogisticRegressionCVOpDesc
d.countVectorizer shouldBe false
d.tfidfTransformer shouldBe false
d.target shouldBe null
d.text shouldBe null
}

"SklearnTrainingLogisticRegressionCVOpDesc.getOutputSchemas" should
"emit the model_name/model schema keyed by the declared output port" in {
val d = new SklearnTrainingLogisticRegressionCVOpDesc
val schema = d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
schema.getAttribute("model").getType shouldBe AttributeType.BINARY
}

"SklearnTrainingLogisticRegressionCVOpDesc.generatePythonCode" should "import the configured sklearn estimator" in {
val d = new SklearnTrainingLogisticRegressionCVOpDesc
d.target = "y"
val code = d.generatePythonCode()
code should include("from sklearn.linear_model import LogisticRegressionCV")
code should include("make_pipeline")
code should include("Training: Logistic Regression Cross Validation")
}

"SklearnTrainingLogisticRegressionCVOpDesc" should "round-trip its config fields through the polymorphic base" in {
val d = new SklearnTrainingLogisticRegressionCVOpDesc
d.target = "label"
d.countVectorizer = true
val json = objectMapper.writeValueAsString(d)
json should include("\"operatorType\":\"SklearnTrainingLogisticRegressionCV\"")
val restored = objectMapper.readValue(json, classOf[LogicalOp])
restored shouldBe a[SklearnTrainingLogisticRegressionCVOpDesc]
val r = restored.asInstanceOf[SklearnTrainingLogisticRegressionCVOpDesc]
r.target shouldBe "label"
r.countVectorizer shouldBe true
}
}
Loading
Loading