@@ -37,7 +37,6 @@ import org.apache.spark.api.java.function.MapFunction
3737import org.apache.spark.api.java.function.ReduceFunction
3838import org.apache.spark.rdd.RDD
3939import org.apache.spark.sql.*
40- import org.jetbrains.kotlinx.spark.extensions.KSparkExtensions
4140import scala.Tuple2
4241import scala.Tuple3
4342import scala.Tuple4
@@ -49,7 +48,7 @@ import kotlin.reflect.KProperty1
4948 * Utility method to create dataset from list
5049 */
5150inline fun <reified T > SparkSession.toDS (list : List <T >): Dataset <T > =
52- createDataset(list, encoder <T >())
51+ createDataset(list, kotlinEncoderFor <T >())
5352
5453/* *
5554 * Utility method to create dataframe from list
@@ -61,26 +60,26 @@ inline fun <reified T> SparkSession.toDF(list: List<T>, vararg colNames: String)
6160 * Utility method to create dataset from *array or vararg arguments
6261 */
6362inline fun <reified T > SparkSession.dsOf (vararg t : T ): Dataset <T > =
64- createDataset(t.toList(), encoder <T >())
63+ createDataset(t.toList(), kotlinEncoderFor <T >())
6564
6665/* *
6766 * Utility method to create dataframe from *array or vararg arguments
6867 */
6968inline fun <reified T > SparkSession.dfOf (vararg t : T ): Dataset <Row > =
70- createDataset(t.toList(), encoder <T >()).toDF()
69+ createDataset(t.toList(), kotlinEncoderFor <T >()).toDF()
7170
7271/* *
7372 * Utility method to create dataframe from *array or vararg arguments with given column names
7473 */
7574inline fun <reified T > SparkSession.dfOf (colNames : Array <String >, vararg t : T ): Dataset <Row > =
76- createDataset(t.toList(), encoder <T >())
75+ createDataset(t.toList(), kotlinEncoderFor <T >())
7776 .run { if (colNames.isEmpty()) toDF() else toDF(* colNames) }
7877
7978/* *
8079 * Utility method to create dataset from list
8180 */
8281inline fun <reified T > List<T>.toDS (spark : SparkSession ): Dataset <T > =
83- spark.createDataset(this , encoder <T >())
82+ spark.createDataset(this , kotlinEncoderFor <T >())
8483
8584/* *
8685 * Utility method to create dataframe from list
@@ -104,13 +103,13 @@ inline fun <reified T> Array<T>.toDF(spark: SparkSession, vararg colNames: Strin
104103 * Utility method to create dataset from RDD
105104 */
106105inline fun <reified T > RDD<T>.toDS (spark : SparkSession ): Dataset <T > =
107- spark.createDataset(this , encoder <T >())
106+ spark.createDataset(this , kotlinEncoderFor <T >())
108107
109108/* *
110109 * Utility method to create dataset from JavaRDD
111110 */
112111inline fun <reified T > JavaRDDLike <T , * >.toDS (spark : SparkSession ): Dataset <T > =
113- spark.createDataset(this .rdd(), encoder <T >())
112+ spark.createDataset(this .rdd(), kotlinEncoderFor <T >())
114113
115114/* *
116115 * Utility method to create Dataset<Row> (Dataframe) from JavaRDD.
@@ -132,37 +131,37 @@ inline fun <reified T> RDD<T>.toDF(spark: SparkSession, vararg colNames: String)
132131 * Returns a new Dataset that contains the result of applying [func] to each element.
133132 */
134133inline fun <reified T , reified R > Dataset<T>.map (noinline func : (T ) -> R ): Dataset <R > =
135- map(MapFunction (func), encoder <R >())
134+ map(MapFunction (func), kotlinEncoderFor <R >())
136135
137136/* *
138137 * (Kotlin-specific)
139138 * Returns a new Dataset by first applying a function to all elements of this Dataset,
140139 * and then flattening the results.
141140 */
142141inline fun <T , reified R > Dataset<T>.flatMap (noinline func : (T ) -> Iterator <R >): Dataset <R > =
143- flatMap(func, encoder <R >())
142+ flatMap(func, kotlinEncoderFor <R >())
144143
145144/* *
146145 * (Kotlin-specific)
147146 * Returns a new Dataset by flattening. This means that a Dataset of an iterable such as
148147 * `listOf(listOf(1, 2, 3), listOf(4, 5, 6))` will be flattened to a Dataset of `listOf(1, 2, 3, 4, 5, 6)`.
149148 */
150149inline fun <reified T , I : Iterable <T >> Dataset<I>.flatten (): Dataset <T > =
151- flatMap(FlatMapFunction { it.iterator() }, encoder <T >())
150+ flatMap(FlatMapFunction { it.iterator() }, kotlinEncoderFor <T >())
152151
153152/* *
154153 * (Kotlin-specific)
155154 * Returns a [KeyValueGroupedDataset] where the data is grouped by the given key [func].
156155 */
157156inline fun <T , reified R > Dataset<T>.groupByKey (noinline func : (T ) -> R ): KeyValueGroupedDataset <R , T > =
158- groupByKey(MapFunction (func), encoder <R >())
157+ groupByKey(MapFunction (func), kotlinEncoderFor <R >())
159158
160159/* *
161160 * (Kotlin-specific)
162161 * Returns a new Dataset that contains the result of applying [func] to each partition.
163162 */
164163inline fun <T , reified R > Dataset<T>.mapPartitions (noinline func : (Iterator <T >) -> Iterator <R >): Dataset <R > =
165- mapPartitions(func, encoder <R >())
164+ mapPartitions(func, kotlinEncoderFor <R >())
166165
167166/* *
168167 * (Kotlin-specific)
@@ -193,15 +192,6 @@ inline fun <reified T1, T2> Dataset<Tuple2<T1, T2>>.takeKeys(): Dataset<T1> = ma
193192 */
194193inline fun <reified T1 , T2 > Dataset <Pair <T1 , T2 >>.takeKeys (): Dataset <T1 > = map { it.first }
195194
196- /* *
197- * (Kotlin-specific)
198- * Maps the Dataset to only retain the "keys" or [Arity2._1] values.
199- */
200- @Suppress(" DEPRECATION" )
201- @JvmName(" takeKeysArity2" )
202- @Deprecated(" Use Scala tuples instead." , ReplaceWith (" " ))
203- inline fun <reified T1 , T2 > Dataset <Arity2 <T1 , T2 >>.takeKeys (): Dataset <T1 > = map { it._1 }
204-
205195/* *
206196 * (Kotlin-specific)
207197 * Maps the Dataset to only retain the "values" or [Tuple2._2] values.
@@ -215,22 +205,13 @@ inline fun <T1, reified T2> Dataset<Tuple2<T1, T2>>.takeValues(): Dataset<T2> =
215205 */
216206inline fun <T1 , reified T2 > Dataset <Pair <T1 , T2 >>.takeValues (): Dataset <T2 > = map { it.second }
217207
218- /* *
219- * (Kotlin-specific)
220- * Maps the Dataset to only retain the "values" or [Arity2._2] values.
221- */
222- @Suppress(" DEPRECATION" )
223- @JvmName(" takeValuesArity2" )
224- @Deprecated(" Use Scala tuples instead." , ReplaceWith (" " ))
225- inline fun <T1 , reified T2 > Dataset <Arity2 <T1 , T2 >>.takeValues (): Dataset <T2 > = map { it._2 }
226-
227208/* * DEPRECATED: Use [as] or [to] for this. */
228209@Deprecated(
229210 message = " Deprecated, since we already have `as`() and to()." ,
230211 replaceWith = ReplaceWith (" this.to<R>()" ),
231212 level = DeprecationLevel .ERROR ,
232213)
233- inline fun <T , reified R > Dataset<T>.downcast (): Dataset <R > = `as `(encoder <R >())
214+ inline fun <T , reified R > Dataset<T>.downcast (): Dataset <R > = `as `(kotlinEncoderFor <R >())
234215
235216/* *
236217 * (Kotlin-specific)
@@ -252,7 +233,7 @@ inline fun <T, reified R> Dataset<T>.downcast(): Dataset<R> = `as`(encoder<R>())
252233 *
253234 * @see to as alias for [as]
254235 */
255- inline fun <reified R > Dataset <* >.`as` (): Dataset <R > = `as `(encoder <R >())
236+ inline fun <reified R > Dataset <* >.`as` (): Dataset <R > = `as `(kotlinEncoderFor <R >())
256237
257238/* *
258239 * (Kotlin-specific)
@@ -274,7 +255,7 @@ inline fun <reified R> Dataset<*>.`as`(): Dataset<R> = `as`(encoder<R>())
274255 *
275256 * @see as as alias for [to]
276257 */
277- inline fun <reified R > Dataset <* >.to (): Dataset <R > = `as `(encoder <R >())
258+ inline fun <reified R > Dataset <* >.to (): Dataset <R > = `as `(kotlinEncoderFor <R >())
278259
279260/* *
280261 * (Kotlin-specific)
@@ -292,12 +273,16 @@ inline fun <reified T> Dataset<T>.forEachPartition(noinline func: (Iterator<T>)
292273/* *
293274 * It's hard to call `Dataset.debugCodegen` from kotlin, so here is utility for that
294275 */
295- fun <T > Dataset<T>.debugCodegen (): Dataset <T > = also { KSparkExtensions .debugCodegen(it) }
276+ fun <T > Dataset<T>.debugCodegen (): Dataset <T > = also {
277+ org.apache.spark.sql.execution.debug.`package$`.`MODULE $`.DebugQuery (it).debugCodegen()
278+ }
296279
297280/* *
298281 * It's hard to call `Dataset.debug` from kotlin, so here is utility for that
299282 */
300- fun <T > Dataset<T>.debug (): Dataset <T > = also { KSparkExtensions .debug(it) }
283+ fun <T > Dataset<T>.debug (): Dataset <T > = also {
284+ org.apache.spark.sql.execution.debug.`package$`.`MODULE $`.DebugQuery (it).debug()
285+ }
301286
302287
303288/* *
@@ -370,18 +355,6 @@ fun <T1, T2> Dataset<Tuple2<T1, T2>>.sortByKey(): Dataset<Tuple2<T1, T2>> = sort
370355@JvmName(" sortByTuple2Value" )
371356fun <T1 , T2 > Dataset <Tuple2 <T1 , T2 >>.sortByValue (): Dataset <Tuple2 <T1 , T2 >> = sort(" _2" )
372357
373- /* * Returns a dataset sorted by the first (`_1`) value of each [Arity2] inside. */
374- @Suppress(" DEPRECATION" )
375- @Deprecated(" Use Scala tuples instead." , ReplaceWith (" " ))
376- @JvmName(" sortByArity2Key" )
377- fun <T1 , T2 > Dataset <Arity2 <T1 , T2 >>.sortByKey (): Dataset <Arity2 <T1 , T2 >> = sort(" _1" )
378-
379- /* * Returns a dataset sorted by the second (`_2`) value of each [Arity2] inside. */
380- @Suppress(" DEPRECATION" )
381- @Deprecated(" Use Scala tuples instead." , ReplaceWith (" " ))
382- @JvmName(" sortByArity2Value" )
383- fun <T1 , T2 > Dataset <Arity2 <T1 , T2 >>.sortByValue (): Dataset <Arity2 <T1 , T2 >> = sort(" _2" )
384-
385358/* * Returns a dataset sorted by the first (`first`) value of each [Pair] inside. */
386359@JvmName(" sortByPairKey" )
387360fun <T1 , T2 > Dataset <Pair <T1 , T2 >>.sortByKey (): Dataset <Pair <T1 , T2 >> = sort(" first" )
0 commit comments