@@ -337,6 +337,8 @@ class BucketedRandomProjectionLSH(_LSH, _BucketedRandomProjectionLSHParams,
337337 >>> model = brp.fit(df)
338338 >>> model.getBucketLength()
339339 1.0
340+ >>> model.setOutputCol("hashes")
341+ BucketedRandomProjectionLSHModel...
340342 >>> model.transform(df).head()
341343 Row(id=0, features=DenseVector([-1.0, -1.0]), hashes=[DenseVector([-1.0])])
342344 >>> data2 = [(4, Vectors.dense([2.0, 2.0 ]),),
@@ -733,6 +735,8 @@ class CountVectorizer(JavaEstimator, _CountVectorizerParams, JavaMLReadable, Jav
733735 >>> cv.setOutputCol("vectors")
734736 CountVectorizer...
735737 >>> model = cv.fit(df)
738+ >>> model.setInputCol("raw")
739+ CountVectorizerModel...
736740 >>> model.transform(df).show(truncate=False)
737741 +-----+---------------+-------------------------+
738742 |label|raw |vectors |
@@ -1345,6 +1349,8 @@ class IDF(JavaEstimator, _IDFParams, JavaMLReadable, JavaMLWritable):
13451349 >>> idf.setOutputCol("idf")
13461350 IDF...
13471351 >>> model = idf.fit(df)
1352+ >>> model.setOutputCol("idf")
1353+ IDFModel...
13481354 >>> model.getMinDocFreq()
13491355 3
13501356 >>> model.idf
@@ -1519,6 +1525,8 @@ class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable):
15191525 >>> imputer.getRelativeError()
15201526 0.001
15211527 >>> model = imputer.fit(df)
1528+ >>> model.setInputCols(["a", "b"])
1529+ ImputerModel...
15221530 >>> model.getStrategy()
15231531 'mean'
15241532 >>> model.surrogateDF.show()
@@ -1810,7 +1818,7 @@ class MaxAbsScaler(JavaEstimator, _MaxAbsScalerParams, JavaMLReadable, JavaMLWri
18101818 MaxAbsScaler...
18111819 >>> model = maScaler.fit(df)
18121820 >>> model.setOutputCol("scaledOutput")
1813- MaxAbsScaler ...
1821+ MaxAbsScalerModel ...
18141822 >>> model.transform(df).show()
18151823 +-----+------------+
18161824 | a|scaledOutput|
@@ -1928,6 +1936,8 @@ class MinHashLSH(_LSH, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaM
19281936 >>> mh.setSeed(12345)
19291937 MinHashLSH...
19301938 >>> model = mh.fit(df)
1939+ >>> model.setInputCol("features")
1940+ MinHashLSHModel...
19311941 >>> model.transform(df).head()
19321942 Row(id=0, features=SparseVector(6, {0: 1.0, 1: 1.0, 2: 1.0}), hashes=[DenseVector([6179668...
19331943 >>> data2 = [(3, Vectors.sparse(6, [1, 3, 5], [1.0, 1.0, 1.0]),),
@@ -2056,7 +2066,7 @@ class MinMaxScaler(JavaEstimator, _MinMaxScalerParams, JavaMLReadable, JavaMLWri
20562066 MinMaxScaler...
20572067 >>> model = mmScaler.fit(df)
20582068 >>> model.setOutputCol("scaledOutput")
2059- MinMaxScaler ...
2069+ MinMaxScalerModel ...
20602070 >>> model.originalMin
20612071 DenseVector([0.0])
20622072 >>> model.originalMax
@@ -2421,6 +2431,8 @@ class OneHotEncoder(JavaEstimator, _OneHotEncoderParams, JavaMLReadable, JavaMLW
24212431 >>> ohe.setOutputCols(["output"])
24222432 OneHotEncoder...
24232433 >>> model = ohe.fit(df)
2434+ >>> model.setOutputCols(["output"])
2435+ OneHotEncoderModel...
24242436 >>> model.getHandleInvalid()
24252437 'error'
24262438 >>> model.transform(df).head().output
@@ -2935,7 +2947,7 @@ class RobustScaler(JavaEstimator, _RobustScalerParams, JavaMLReadable, JavaMLWri
29352947 RobustScaler...
29362948 >>> model = scaler.fit(df)
29372949 >>> model.setOutputCol("output")
2938- RobustScaler ...
2950+ RobustScalerModel ...
29392951 >>> model.median
29402952 DenseVector([2.0, -2.0])
29412953 >>> model.range
@@ -3330,7 +3342,7 @@ class StandardScaler(JavaEstimator, _StandardScalerParams, JavaMLReadable, JavaM
33303342 >>> model.getInputCol()
33313343 'a'
33323344 >>> model.setOutputCol("output")
3333- StandardScaler ...
3345+ StandardScalerModel ...
33343346 >>> model.mean
33353347 DenseVector([1.0])
33363348 >>> model.std
@@ -3490,6 +3502,8 @@ class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLW
34903502 >>> stringIndexer.setHandleInvalid("error")
34913503 StringIndexer...
34923504 >>> model = stringIndexer.fit(stringIndDf)
3505+ >>> model.setHandleInvalid("error")
3506+ StringIndexerModel...
34933507 >>> td = model.transform(stringIndDf)
34943508 >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),
34953509 ... key=lambda x: x[0])
@@ -4166,7 +4180,7 @@ class VectorIndexer(JavaEstimator, _VectorIndexerParams, JavaMLReadable, JavaMLW
41664180 >>> indexer.getHandleInvalid()
41674181 'error'
41684182 >>> model.setOutputCol("output")
4169- VectorIndexer ...
4183+ VectorIndexerModel ...
41704184 >>> model.transform(df).head().output
41714185 DenseVector([1.0, 0.0])
41724186 >>> model.numFeatures
@@ -4487,6 +4501,8 @@ class Word2Vec(JavaEstimator, _Word2VecParams, JavaMLReadable, JavaMLWritable):
44874501 >>> model = word2Vec.fit(doc)
44884502 >>> model.getMinCount()
44894503 5
4504+ >>> model.setInputCol("sentence")
4505+ Word2VecModel...
44904506 >>> model.getVectors().show()
44914507 +----+--------------------+
44924508 |word| vector|
@@ -4714,7 +4730,7 @@ class PCA(JavaEstimator, _PCAParams, JavaMLReadable, JavaMLWritable):
47144730 >>> model.getK()
47154731 2
47164732 >>> model.setOutputCol("output")
4717- PCA ...
4733+ PCAModel ...
47184734 >>> model.transform(df).collect()[0].output
47194735 DenseVector([1.648..., -4.013...])
47204736 >>> model.explainedVariance
@@ -5139,6 +5155,8 @@ class ChiSqSelector(JavaEstimator, _ChiSqSelectorParams, JavaMLReadable, JavaMLW
51395155 >>> model = selector.fit(df)
51405156 >>> model.getFeaturesCol()
51415157 'features'
5158+ >>> model.setFeaturesCol("features")
5159+ ChiSqSelectorModel...
51425160 >>> model.transform(df).head().selectedFeatures
51435161 DenseVector([18.0])
51445162 >>> model.selectedFeatures
0 commit comments