Skip to content

Commit ffa2ec5

Browse files
committed
alt non-contiguous approach
1 parent aa23a3e commit ffa2ec5

File tree

1 file changed

+47
-52
lines changed

1 file changed

+47
-52
lines changed

src/_arraykit.c

Lines changed: 47 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3572,58 +3572,6 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) {
35723572

35733573

35743574

3575-
3576-
// NpyIter *iter = NpyIter_New(
3577-
// array, // array
3578-
// NPY_ITER_READONLY | NPY_ITER_EXTERNAL_LOOP, // iter flags
3579-
// NPY_KEEPORDER, // order
3580-
// NPY_NO_CASTING, // casting
3581-
// NULL // dtype
3582-
// );
3583-
// if (iter == NULL) {
3584-
// free(indices);
3585-
// return NULL;
3586-
// }
3587-
// NpyIter_IterNextFunc *iter_next = NpyIter_GetIterNext(iter, NULL);
3588-
// if (iter_next == NULL) {
3589-
// free(indices);
3590-
// NpyIter_Deallocate(iter);
3591-
// return NULL;
3592-
// }
3593-
// char **data_ptr = NpyIter_GetDataPtrArray(iter);
3594-
// char* data;
3595-
// npy_intp *stride_ptr = NpyIter_GetInnerStrideArray(iter);
3596-
// npy_intp stride;
3597-
// npy_intp *inner_size_ptr = NpyIter_GetInnerLoopSizePtr(iter);
3598-
// npy_intp inner_size;
3599-
// npy_int64 i = 0;
3600-
3601-
3602-
// do {
3603-
// data = *data_ptr;
3604-
// stride = *stride_ptr;
3605-
// inner_size = *inner_size_ptr;
3606-
// while (inner_size--) {
3607-
// if (*(npy_bool*)data) {
3608-
// if (AK_UNLIKELY(count == capacity)) {
3609-
// capacity <<= 1;
3610-
// indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);
3611-
// if (indices == NULL) {
3612-
// NpyIter_Deallocate(iter);
3613-
// return NULL;
3614-
// }
3615-
// }
3616-
// indices[count++] = i;
3617-
// }
3618-
// i++;
3619-
// data += stride;
3620-
// }
3621-
// } while(iter_next(iter));
3622-
// NpyIter_Deallocate(iter);
3623-
3624-
3625-
3626-
36273575
// Given a Boolean, contiguous 1D array, return the index positions in an int64 array. Through experimentation it has been verified that doing full-size allocation of memory does not permit outperforming NumPy at 10_000_000 scale; but doing less optimizations does help. Using bit masks does not improve perforamnce over pointer arithmetic. Prescanning for all empty is very effective. Note that NumPy befits from first counting the nonzeros, then allocating only enough data for the expexted number.
36283576
static inline PyObject*
36293577
AK_nonzero_1d(PyArrayObject* array) {
@@ -3681,6 +3629,53 @@ AK_nonzero_1d(PyArrayObject* array) {
36813629
p++;
36823630
}
36833631
}
3632+
// else {
3633+
// NpyIter *iter = NpyIter_New(
3634+
// array, // array
3635+
// NPY_ITER_READONLY | NPY_ITER_EXTERNAL_LOOP, // iter flags
3636+
// NPY_KEEPORDER, // order
3637+
// NPY_NO_CASTING, // casting
3638+
// NULL // dtype
3639+
// );
3640+
// if (iter == NULL) {
3641+
// free(indices);
3642+
// return NULL;
3643+
// }
3644+
// NpyIter_IterNextFunc *iter_next = NpyIter_GetIterNext(iter, NULL);
3645+
// if (iter_next == NULL) {
3646+
// free(indices);
3647+
// NpyIter_Deallocate(iter);
3648+
// return NULL;
3649+
// }
3650+
// char **data_ptr = NpyIter_GetDataPtrArray(iter);
3651+
// char* data;
3652+
// npy_intp *stride_ptr = NpyIter_GetInnerStrideArray(iter);
3653+
// npy_intp stride;
3654+
// npy_intp *inner_size_ptr = NpyIter_GetInnerLoopSizePtr(iter);
3655+
// npy_intp inner_size;
3656+
// npy_int64 i = 0;
3657+
// do {
3658+
// data = *data_ptr;
3659+
// stride = *stride_ptr;
3660+
// inner_size = *inner_size_ptr;
3661+
// while (inner_size--) {
3662+
// if (*(npy_bool*)data) {
3663+
// if (AK_UNLIKELY(count == capacity)) {
3664+
// capacity <<= 1;
3665+
// indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);
3666+
// if (indices == NULL) {
3667+
// NpyIter_Deallocate(iter);
3668+
// return NULL;
3669+
// }
3670+
// }
3671+
// indices[count++] = i;
3672+
// }
3673+
// i++;
3674+
// data += stride;
3675+
// }
3676+
// } while(iter_next(iter));
3677+
// NpyIter_Deallocate(iter);
3678+
// }
36843679
else {
36853680
npy_intp i = 0; // position within Boolean array
36863681
npy_intp i_end = count_max;

0 commit comments

Comments
 (0)