diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 67812846cf057..7868666e36e88 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -19651,6 +19651,8 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column: def array_distinct(col: "ColumnOrName") -> Column: """ Array function: removes duplicate values from the array. + The order of elements in the result is the same as the order of their first occurrence + in the input. .. versionadded:: 2.4.0 @@ -19830,7 +19832,7 @@ def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: An def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Array function: returns a new array containing the intersection of elements in col1 and col2, - without duplicates. + without duplicates. The result preserves the order of elements from the first array. .. versionadded:: 2.4.0 @@ -19923,7 +19925,8 @@ def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Array function: returns a new array containing the union of elements in col1 and col2, - without duplicates. + without duplicates. The result preserves the order of elements from the first array, + followed by elements from the second array that are not in the first. .. versionadded:: 2.4.0 @@ -20016,7 +20019,7 @@ def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Array function: returns a new array containing the elements present in col1 but not in col2, - without duplicates. + without duplicates. The result preserves the order of elements from the first array. .. versionadded:: 2.4.0 diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index b3bd22e6323b5..222b24508529d 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -8845,7 +8845,8 @@ object functions { Column.fn("array_prepend", column, lit(element)) /** - * Removes duplicate values from the array. + * Removes duplicate values from the array. The order of elements in the result is the same as + * the order of their first occurrence in the input. * @group array_funcs * @since 2.4.0 */ @@ -8853,7 +8854,7 @@ object functions { /** * Returns an array of the elements in the intersection of the given two arrays, without - * duplicates. + * duplicates. The result preserves the order of elements from the first array. * * @group array_funcs * @since 2.4.0 @@ -8872,6 +8873,8 @@ object functions { /** * Returns an array of the elements in the union of the given two arrays, without duplicates. + * The result preserves the order of elements from the first array, followed by elements from + * the second array that are not in the first. * * @group array_funcs * @since 2.4.0 @@ -8881,7 +8884,7 @@ object functions { /** * Returns an array of the elements in the first array but not in the second array, without - * duplicates. The order of elements in the result is not determined + * duplicates. The result preserves the order of elements from the first array. * * @group array_funcs * @since 2.4.0 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 60966f3098ca8..990f81c7ee587 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -4200,9 +4200,13 @@ trait ArraySetLike { /** * Removes duplicate values from the array. + * The order of elements in the result is the same as the order of their first occurrence + * in the input. */ @ExpressionDescription( - usage = "_FUNC_(array) - Removes duplicate values from the array.", + usage = """_FUNC_(array) - Removes duplicate values from the array. + The order of elements in the result is the same as the order of their first occurrence + in the input.""", examples = """ Examples: > SELECT _FUNC_(array(1, 2, 3, null, 3)); @@ -4391,12 +4395,16 @@ trait ArrayBinaryLike } /** - * Returns an array of the elements in the union of x and y, without duplicates + * Returns an array of the elements in the union of x and y, without duplicates. + * The result preserves the order of elements from the first array, followed by elements + * from the second array that are not in the first. */ @ExpressionDescription( usage = """ _FUNC_(array1, array2) - Returns an array of the elements in the union of array1 and array2, without duplicates. + The result preserves the order of elements from the first array, followed by elements + from the second array that are not in the first. """, examples = """ Examples: @@ -4568,12 +4576,14 @@ case class ArrayUnion(left: Expression, right: Expression) extends ArrayBinaryLi } /** - * Returns an array of the elements in the intersect of x and y, without duplicates + * Returns an array of the elements in the intersect of x and y, without duplicates. + * The result preserves the order of elements from the first array. */ @ExpressionDescription( usage = """ _FUNC_(array1, array2) - Returns an array of the elements in the intersection of array1 and array2, without duplicates. + The result preserves the order of elements from the first array. """, examples = """ Examples: @@ -4800,12 +4810,14 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina } /** - * Returns an array of the elements in the intersect of x and y, without duplicates + * Returns an array of the elements in array1 but not in array2, without duplicates. + * The result preserves the order of elements from the first array. */ @ExpressionDescription( usage = """ _FUNC_(array1, array2) - Returns an array of the elements in array1 but not in array2, without duplicates. + The result preserves the order of elements from the first array. """, examples = """ Examples: