diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 0cc7434cf..26bac149c 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -217,6 +217,7 @@ "random", "range", "rank", + "regexp_count", "regexp_like", "regexp_match", "regexp_replace", @@ -779,6 +780,23 @@ def regexp_replace( return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags)) +def regexp_count( + string: Expr, pattern: Expr, start: Expr, flags: Expr | None = None +) -> Expr: + """Returns the number of matches in a string. + + Optional start position (the first position is 1) to search for the regular + expression. + """ + if flags is not None: + flags = flags.expr + if start is not None: + start = start.expr + else: + start = Expr.expr + return Expr(f.regexp_count(string.expr, pattern.expr, start, flags)) + + def repeat(string: Expr, n: Expr) -> Expr: """Repeats the ``string`` to ``n`` times.""" return Expr(f.repeat(string.expr, n.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index ed88a16e3..161e1e3bb 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -740,6 +740,10 @@ def test_array_function_obj_tests(stmt, py_expr): f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")), pa.array(["H-o", "W-d", "!"]), ), + ( + f.regexp_count(column("a"), literal("(ell|orl)"), literal(1)), + pa.array([1, 1, 0], type=pa.int64()), + ), ], ) def test_string_functions(df, function, expected_result): diff --git a/src/functions.rs b/src/functions.rs index 6a8abb18d..8fac239b4 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -173,6 +173,25 @@ fn regexp_replace( ) .into()) } + +#[pyfunction] +#[pyo3(signature = (string, pattern, start, flags=None))] +/// Returns the number of matches found in the string. +fn regexp_count( + string: PyExpr, + pattern: PyExpr, + start: Option, + flags: Option, +) -> PyResult { + Ok(functions::expr_fn::regexp_count( + string.expr, + pattern.expr, + start.map(|x| x.expr), + flags.map(|x| x.expr), + ) + .into()) +} + /// Creates a new Sort Expr #[pyfunction] fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult { @@ -943,6 +962,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(power))?; m.add_wrapped(wrap_pyfunction!(radians))?; m.add_wrapped(wrap_pyfunction!(random))?; + m.add_wrapped(wrap_pyfunction!(regexp_count))?; m.add_wrapped(wrap_pyfunction!(regexp_like))?; m.add_wrapped(wrap_pyfunction!(regexp_match))?; m.add_wrapped(wrap_pyfunction!(regexp_replace))?;