Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions python/datafusion/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,17 +914,20 @@ def repartition(self, num: int) -> DataFrame:
"""
return DataFrame(self.df.repartition(num))

def repartition_by_hash(self, *exprs: Expr, num: int) -> DataFrame:
def repartition_by_hash(self, *exprs: Expr | str, num: int) -> DataFrame:
"""Repartition a DataFrame using a hash partitioning scheme.

Args:
exprs: Expressions to evaluate and perform hashing on.
exprs: Expressions or a SQL expression string to evaluate
and perform hashing on.
num: Number of partitions to repartition the DataFrame into.

Returns:
Repartitioned DataFrame.
"""
exprs = [expr.expr for expr in exprs]
exprs = [self.parse_sql_expr(e) if isinstance(e, str) else e for e in exprs]
exprs = expr_list_to_raw_expr_list(exprs)

return DataFrame(self.df.repartition_by_hash(*exprs, num=num))

def union(self, other: DataFrame, distinct: bool = False) -> DataFrame:
Expand Down
5 changes: 5 additions & 0 deletions python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1613,6 +1613,11 @@ def test_repartition(df):
def test_repartition_by_hash(df):
df.repartition_by_hash(column("a"), num=2)

def test_repartition_by_hash_sql_expression(df):
df.repartition_by_hash("a", num=2)

def test_repartition_by_hash_mix(df):
df.repartition_by_hash(column("a"), "b", num=2)

def test_intersect():
ctx = SessionContext()
Expand Down
Loading