Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ if (WITH_TEST_RUNTIME)
add_subdirectory(runtime)
endif ()

# FIXME: failing_with_issue is dead code :)
Halide_feature(WITH_TEST_FAILING_WITH_ISSUE "Build known-failing tests" OFF)
if (WITH_TEST_FAILING_WITH_ISSUE)
add_subdirectory(failing_with_issue)
endif ()

Halide_feature(WITH_TEST_FUZZ "Build fuzz tests" ON)
if (WITH_TEST_FUZZ)
Expand Down
1 change: 1 addition & 0 deletions test/correctness/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ tests(GROUPS correctness
vectorize_guard_with_if.cpp
vectorize_mixed_widths.cpp
vectorize_nested.cpp
vectorize_pred.cpp
vectorize_varying_allocation_size.cpp
vectorized_assert.cpp
vectorized_gpu_allocation.cpp
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
#include "Halide.h"
#include "halide_benchmark.h"
#include <cstdio>

using namespace Halide;
using namespace Halide::Tools;

template<typename T>
T tolerance() {
Expand All @@ -15,10 +13,6 @@ float tolerance<float>() {
return 1e-7f;
}

template<>
double tolerance<double>() {
return 1e-14;
}

template<typename T>
bool equals(T a, T b, T epsilon = tolerance<T>()) {
Expand All @@ -29,8 +23,8 @@ bool equals(T a, T b, T epsilon = tolerance<T>()) {
template<typename A>
bool test(int vec_width) {

int W = vec_width * 1;
int H = 50000;
int W = vec_width * 4;
int H = 1000;

Buffer<A> input(W, H + 20);
for (int y = 0; y < H + 20; y++) {
Expand All @@ -39,8 +33,8 @@ bool test(int vec_width) {
}
}

Var x, y;
Func f, g;
Var x("x"), y("y");
Func f("f"), g("g");

RDom r(0, W, 0, H);
r.where((r.x * r.y) % 8 < 7);
Expand All @@ -54,50 +48,33 @@ bool test(int vec_width) {
e = e + input(r.x, r.y + i);
}

f(x, y) = undef<A>();
f(x, y) = cast<A>(0);
f(r.x, r.y) = e;
g(x, y) = undef<A>();
g(x, y) = cast<A>(0);
g(r.x, r.y) = e;
f.update(0).vectorize(r.x);

Buffer<A> outputg = g.realize({W, H});
Buffer<A> outputf = f.realize({W, H});

double t_g = benchmark([&]() {
g.realize(outputg);
});
double t_f = benchmark([&]() {
f.realize(outputf);
});

for (int y = 0; y < H; y++) {
for (int x = 0; x < W; x++) {
if (!equals(outputf(x, y), outputg(x, y))) {
for (int j = 0; j < H; j++) {
for (int i = 0; i < W; i++) {
if (!equals(outputf(i, j), outputg(i, j))) {
std::cout << type_of<A>() << " x " << vec_width << " failed at "
<< x << " " << y << ": "
<< outputf(x, y) << " vs " << outputg(x, y) << "\n"
<< i << " " << j << ": "
<< outputf(i, j) << " vs " << outputg(i, j) << "\n"
<< "Failure!\n";
exit(1);
return false;
}
}
}

printf("Vectorized vs scalar (%s x %d): %1.3gms %1.3gms. Speedup = %1.3f\n",
string_of_type<A>(), vec_width, t_f * 1e3, t_g * 1e3, t_g / t_f);

if (t_f > t_g) {
return false;
}

return true;
}

int main(int argc, char **argv) {
// As for now, we would only vectorize predicated store/load on Hexagon or
// if it is of type 32-bit value and has lanes no less than 4 on x86
test<float>(4);
test<float>(8);
if (!test<float>(4)) return 1;
if (!test<float>(8)) return 1;

printf("Success!\n");
return 0;
Expand Down
7 changes: 4 additions & 3 deletions test/failing_with_issue/3292_async_specialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ using namespace Halide;

size_t custom_malloc_size = 0;

void *my_malloc(void *user_context, size_t x) {
void *my_malloc(JITUserContext *user_context, size_t x) {
custom_malloc_size = x;
void *orig = malloc(x + 32);
void *ptr = (void *)((((size_t)orig + 32) >> 5) << 5);
((void **)ptr)[-1] = orig;
return ptr;
}

void my_free(void *user_context, void *ptr) {
void my_free(JITUserContext *user_context, void *ptr) {
free(((void **)ptr)[-1]);
}

Expand Down Expand Up @@ -49,7 +49,8 @@ int main(int argc, char **argv) {
// automatic storage folding refused to fold this (the case
// above).

g.set_custom_allocator(my_malloc, my_free);
g.jit_handlers().custom_malloc = my_malloc;
g.jit_handlers().custom_free = my_free;

Buffer<int> im = g.realize({100, 1000});

Expand Down
7 changes: 4 additions & 3 deletions test/failing_with_issue/3293_storage_folding_async.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ using namespace Halide;

size_t custom_malloc_size = 0;

void *my_malloc(void *user_context, size_t x) {
void *my_malloc(JITUserContext *user_context, size_t x) {
custom_malloc_size = x;
void *orig = malloc(x + 32);
void *ptr = (void *)((((size_t)orig + 32) >> 5) << 5);
((void **)ptr)[-1] = orig;
return ptr;
}

void my_free(void *user_context, void *ptr) {
void my_free(JITUserContext *user_context, void *ptr) {
free(((void **)ptr)[-1]);
}

Expand Down Expand Up @@ -51,7 +51,8 @@ int main(int argc, char **argv) {
// automatic storage folding refused to fold this (the case
// above).

h.set_custom_allocator(my_malloc, my_free);
h.jit_handlers().custom_malloc = my_malloc;
h.jit_handlers().custom_free = my_free;

Buffer<int> im = h.realize({100, 1000});

Expand Down
5 changes: 3 additions & 2 deletions test/failing_with_issue/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
tests(GROUPS failing_with_issue
EXPECT_FAILURE
SOURCES
3292_async_specialize.cpp
3293_storage_folding_async.cpp
3357_vectorize_pred.cpp
4283_store_at_gpu.cpp
# 3357_vectorize_pred.cpp # moved to test/correctness/vectorize_pred.cpp
# 4283_store_at_gpu.cpp # TODO: test file missing, tracked in issue #4283
)