Skip to content

Commit 5456c93

Browse files
committed
looooop
1 parent 41fc5ad commit 5456c93

File tree

1 file changed

+139
-2
lines changed

1 file changed

+139
-2
lines changed

quaddtype/numpy_quaddtype/src/umath/comparison_ops.cpp

Lines changed: 139 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include "binary_ops.h"
2424
#include "comparison_ops.h"
2525

26-
2726
static NPY_CASTING
2827
quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
2928
PyArray_Descr *const given_descrs[],
@@ -145,6 +144,119 @@ quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *con
145144
}
146145
return 0;
147146
}
147+
// todo: It'll be better to generate separate templates for aligned and unaligned loops
148+
// Resolve desc and strided loops for logical reduction (Bool, Quad) => Bool
149+
static NPY_CASTING
150+
quad_comparison_reduce_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[],
151+
PyArray_Descr *const given_descrs[],
152+
PyArray_Descr *loop_descrs[],
153+
npy_intp *NPY_UNUSED(view_offset))
154+
{
155+
QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0];
156+
QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1];
157+
NPY_CASTING casting = NPY_SAFE_CASTING;
158+
159+
for (int i = 0; i < 2; i++) {
160+
Py_INCREF(given_descrs[i]);
161+
loop_descrs[i] = given_descrs[i];
162+
}
163+
164+
// Set up output descriptor
165+
loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL);
166+
if (!loop_descrs[2]) {
167+
return (NPY_CASTING)-1;
168+
}
169+
return casting;
170+
}
171+
172+
template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
173+
int
174+
quad_reduce_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
175+
npy_intp const dimensions[], npy_intp const strides[],
176+
NpyAuxData *auxdata)
177+
{
178+
npy_intp N = dimensions[0];
179+
char *in1_ptr = data[0]; // bool
180+
char *in2_ptr = data[1]; // quad
181+
char *out_ptr = data[2]; // bool
182+
npy_intp in1_stride = strides[0];
183+
npy_intp in2_stride = strides[1];
184+
npy_intp out_stride = strides[2];
185+
186+
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[1];
187+
QuadBackendType backend = descr->backend;
188+
while (N--) {
189+
npy_bool in1 = *(npy_bool *)in1_ptr;
190+
quad_value in1_quad;
191+
quad_value in2;
192+
193+
npy_bool result;
194+
195+
if (backend == BACKEND_SLEEF) {
196+
in1_quad.sleef_value = Sleef_cast_from_int64q1(in1);
197+
in2.sleef_value = *(Sleef_quad *)in2_ptr;
198+
result = sleef_comp(&in1_quad.sleef_value, &in2.sleef_value);
199+
}
200+
else {
201+
in1_quad.longdouble_value = static_cast<long double>(in1);
202+
in2.longdouble_value = *(long double *)in2_ptr;
203+
result = ld_comp(&in1_quad.longdouble_value, &in2.longdouble_value);
204+
}
205+
206+
*(npy_bool *)out_ptr = result;
207+
208+
in1_ptr += in1_stride;
209+
in2_ptr += in2_stride;
210+
out_ptr += out_stride;
211+
}
212+
return 0;
213+
}
214+
215+
template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
216+
int
217+
quad_reduce_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
218+
npy_intp const dimensions[], npy_intp const strides[],
219+
NpyAuxData *auxdata)
220+
{
221+
npy_intp N = dimensions[0];
222+
char *in1_ptr = data[0]; // bool
223+
char *in2_ptr = data[1]; // quad
224+
char *out_ptr = data[2]; // bool
225+
npy_intp in1_stride = strides[0];
226+
npy_intp in2_stride = strides[1];
227+
npy_intp out_stride = strides[2];
228+
229+
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[1];
230+
QuadBackendType backend = descr->backend;
231+
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
232+
233+
npy_bool in1;
234+
quad_value in1_quad, in2;
235+
while (N--) {
236+
memcpy(&in1, in1_ptr, sizeof(npy_bool));
237+
if(backend == BACKEND_SLEEF)
238+
in1_quad.sleef_value = Sleef_cast_from_int64q1(in1);
239+
else
240+
in1_quad.longdouble_value = static_cast<long double>(in1);
241+
memcpy(&in2, in2_ptr, elem_size);
242+
npy_bool result;
243+
244+
if (backend == BACKEND_SLEEF) {
245+
result = sleef_comp(&in1_quad.sleef_value, &in2.sleef_value);
246+
}
247+
else {
248+
result = ld_comp(&in1_quad.longdouble_value, &in2.longdouble_value);
249+
}
250+
251+
memcpy(out_ptr, &result, sizeof(npy_bool));
252+
253+
in1_ptr += in1_stride;
254+
in2_ptr += in2_stride;
255+
out_ptr += out_stride;
256+
}
257+
return 0;
258+
}
259+
148260

149261
NPY_NO_EXPORT int
150262
comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
@@ -194,13 +306,38 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
194306
return -1;
195307
}
196308

309+
// registering the reduce methods
310+
PyArray_DTypeMeta *dtypes_reduce[3] = {&PyArray_BoolDType, &QuadPrecDType, &PyArray_BoolDType};
311+
312+
PyType_Slot slots_reduce[] = {
313+
{NPY_METH_resolve_descriptors, (void *)&quad_comparison_reduce_resolve_descriptors},
314+
{NPY_METH_strided_loop,
315+
(void *)&quad_reduce_comp_strided_loop_unaligned<sleef_comp, ld_comp>},
316+
{NPY_METH_unaligned_strided_loop,
317+
(void *)&quad_reduce_comp_strided_loop_unaligned<sleef_comp, ld_comp>},
318+
{0, NULL}};
319+
320+
PyArrayMethod_Spec Spec_reduce = {
321+
.name = "quad_comp",
322+
.nin = 2,
323+
.nout = 1,
324+
.casting = NPY_SAFE_CASTING,
325+
.flags = NPY_METH_SUPPORTS_UNALIGNED,
326+
.dtypes = dtypes_reduce,
327+
.slots = slots_reduce,
328+
};
329+
330+
if (PyUFunc_AddLoopFromSpec(ufunc, &Spec_reduce) < 0) {
331+
return -1;
332+
}
333+
197334
PyObject *promoter_capsule =
198335
PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL);
199336
if (promoter_capsule == NULL) {
200337
return -1;
201338
}
202339

203-
PyObject *DTypes = PyTuple_Pack(3, Py_None, Py_None, Py_None);
340+
PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArray_BoolDType);
204341
if (DTypes == 0) {
205342
Py_DECREF(promoter_capsule);
206343
return -1;

0 commit comments

Comments
 (0)