@@ -36,7 +36,8 @@ def get_dispatch(dtypes):
3636def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
3737 ndarray[int64_t] counts,
3838 ndarray[{{c_type}}, ndim=2] values,
39- ndarray[int64_t] labels):
39+ ndarray[int64_t] labels,
40+ Py_ssize_t min_count=1):
4041 """
4142 Only aggregates on axis=0
4243 """
@@ -88,7 +89,7 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
8889
8990 for i in range(ncounts):
9091 for j in range(K):
91- if nobs[i, j] == 0 :
92+ if nobs[i, j] < min_count :
9293 out[i, j] = NAN
9394 else:
9495 out[i, j] = sumx[i, j]
@@ -99,7 +100,8 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
99100def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
100101 ndarray[int64_t] counts,
101102 ndarray[{{c_type}}, ndim=2] values,
102- ndarray[int64_t] labels):
103+ ndarray[int64_t] labels,
104+ Py_ssize_t min_count=1):
103105 """
104106 Only aggregates on axis=0
105107 """
@@ -147,7 +149,7 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
147149
148150 for i in range(ncounts):
149151 for j in range(K):
150- if nobs[i, j] == 0 :
152+ if nobs[i, j] < min_count :
151153 out[i, j] = NAN
152154 else:
153155 out[i, j] = prodx[i, j]
@@ -159,12 +161,15 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
159161def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
160162 ndarray[int64_t] counts,
161163 ndarray[{{dest_type2}}, ndim=2] values,
162- ndarray[int64_t] labels):
164+ ndarray[int64_t] labels,
165+ Py_ssize_t min_count=-1):
163166 cdef:
164167 Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
165168 {{dest_type2}} val, ct, oldmean
166169 ndarray[{{dest_type2}}, ndim=2] nobs, mean
167170
171+ assert min_count == -1, "'min_count' only used in add and prod"
172+
168173 if not len(values) == len(labels):
169174 raise AssertionError("len(index) != len(labels)")
170175
@@ -208,12 +213,15 @@ def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
208213def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
209214 ndarray[int64_t] counts,
210215 ndarray[{{dest_type2}}, ndim=2] values,
211- ndarray[int64_t] labels):
216+ ndarray[int64_t] labels,
217+ Py_ssize_t min_count=-1):
212218 cdef:
213219 Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
214220 {{dest_type2}} val, count
215221 ndarray[{{dest_type2}}, ndim=2] sumx, nobs
216222
223+ assert min_count == -1, "'min_count' only used in add and prod"
224+
217225 if not len(values) == len(labels):
218226 raise AssertionError("len(index) != len(labels)")
219227
@@ -263,7 +271,8 @@ def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
263271def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
264272 ndarray[int64_t] counts,
265273 ndarray[{{dest_type2}}, ndim=2] values,
266- ndarray[int64_t] labels):
274+ ndarray[int64_t] labels,
275+ Py_ssize_t min_count=-1):
267276 """
268277 Only aggregates on axis=0
269278 """
@@ -272,6 +281,8 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
272281 {{dest_type2}} val, count
273282 Py_ssize_t ngroups = len(counts)
274283
284+ assert min_count == -1, "'min_count' only used in add and prod"
285+
275286 if len(labels) == 0:
276287 return
277288
@@ -332,7 +343,8 @@ def get_dispatch(dtypes):
332343def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
333344 ndarray[int64_t] counts,
334345 ndarray[{{c_type}}, ndim=2] values,
335- ndarray[int64_t] labels):
346+ ndarray[int64_t] labels,
347+ Py_ssize_t min_count=-1):
336348 """
337349 Only aggregates on axis=0
338350 """
@@ -342,6 +354,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
342354 ndarray[{{dest_type2}}, ndim=2] resx
343355 ndarray[int64_t, ndim=2] nobs
344356
357+ assert min_count == -1, "'min_count' only used in add and prod"
358+
345359 if not len(values) == len(labels):
346360 raise AssertionError("len(index) != len(labels)")
347361
@@ -382,7 +396,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
382396def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
383397 ndarray[int64_t] counts,
384398 ndarray[{{c_type}}, ndim=2] values,
385- ndarray[int64_t] labels, int64_t rank):
399+ ndarray[int64_t] labels, int64_t rank,
400+ Py_ssize_t min_count=-1):
386401 """
387402 Only aggregates on axis=0
388403 """
@@ -392,6 +407,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
392407 ndarray[{{dest_type2}}, ndim=2] resx
393408 ndarray[int64_t, ndim=2] nobs
394409
410+ assert min_count == -1, "'min_count' only used in add and prod"
411+
395412 if not len(values) == len(labels):
396413 raise AssertionError("len(index) != len(labels)")
397414
@@ -455,7 +472,8 @@ def get_dispatch(dtypes):
455472def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
456473 ndarray[int64_t] counts,
457474 ndarray[{{dest_type2}}, ndim=2] values,
458- ndarray[int64_t] labels):
475+ ndarray[int64_t] labels,
476+ Py_ssize_t min_count=-1):
459477 """
460478 Only aggregates on axis=0
461479 """
@@ -464,6 +482,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
464482 {{dest_type2}} val, count
465483 ndarray[{{dest_type2}}, ndim=2] maxx, nobs
466484
485+ assert min_count == -1, "'min_count' only used in add and prod"
486+
467487 if not len(values) == len(labels):
468488 raise AssertionError("len(index) != len(labels)")
469489
@@ -526,7 +546,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
526546def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
527547 ndarray[int64_t] counts,
528548 ndarray[{{dest_type2}}, ndim=2] values,
529- ndarray[int64_t] labels):
549+ ndarray[int64_t] labels,
550+ Py_ssize_t min_count=-1):
530551 """
531552 Only aggregates on axis=0
532553 """
@@ -535,6 +556,8 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
535556 {{dest_type2}} val, count
536557 ndarray[{{dest_type2}}, ndim=2] minx, nobs
537558
559+ assert min_count == -1, "'min_count' only used in add and prod"
560+
538561 if not len(values) == len(labels):
539562 raise AssertionError("len(index) != len(labels)")
540563
@@ -686,7 +709,8 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
686709def group_median_float64(ndarray[float64_t, ndim=2] out,
687710 ndarray[int64_t] counts,
688711 ndarray[float64_t, ndim=2] values,
689- ndarray[int64_t] labels):
712+ ndarray[int64_t] labels,
713+ Py_ssize_t min_count=-1):
690714 """
691715 Only aggregates on axis=0
692716 """
@@ -695,6 +719,9 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
695719 ndarray[int64_t] _counts
696720 ndarray data
697721 float64_t* ptr
722+
723+ assert min_count == -1, "'min_count' only used in add and prod"
724+
698725 ngroups = len(counts)
699726 N, K = (<object> values).shape
700727
0 commit comments