Skip to content

Commit c6e6991

Browse files
committed
Start on tuning suite
1 parent 54c5455 commit c6e6991

File tree

17 files changed

+946
-30
lines changed

17 files changed

+946
-30
lines changed

Makefile.in

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
FLINT_DIR:=.
1313
SRC_DIR:=src
1414
BUILD_DIR:=build
15+
TUNE_DIR:=src/tune
1516
ABS_FLINT_DIR:='$(patsubst %/,%, $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))'
1617
ABS_SRC_DIR:=$(ABS_FLINT_DIR)/$(SRC_DIR)
1718
ABS_BUILD_DIR:=$(ABS_FLINT_DIR)/$(SRC_DIR)
19+
ABS_TUNE_DIR:=$(ABS_FLINT_DIR)/$(TUNE_DIR)
1820

1921
FLINT_VERSION:=@FLINT_VERSION_FULL@
2022
FLINT_MAJOR_SO:=@FLINT_MAJOR_SO@
@@ -81,6 +83,7 @@ CFLAGS:=@CFLAGS@
8183
TESTCFLAGS:=@TESTCFLAGS@
8284
CPPFLAGS:=@CPPFLAGS@ -DBUILDING_FLINT
8385
CPPFLAGS2:=-L$(FLINT_DIR) $(CPPFLAGS)
86+
CPPFLAGS3:=-I$(TUNE_DIR) $(CPPFLAGS2)
8487
LIB_CPPFLAGS:=@LIB_CPPFLAGS@
8588
CXXFLAGS:=@CXXFLAGS@
8689
LIBS:=@LIBS@
@@ -218,8 +221,15 @@ TEMPLATE_DIRS := \
218221
fq_poly_templates fq_poly_factor_templates \
219222
fq_embed_templates fq_templates
220223

224+
_TUNE_DIRS := \
225+
ulong_extras
226+
227+
TUNE_DIRS := $(patsubst %, $(TUNE_DIR)/%, $(_TUNE_DIRS))
228+
TUNE_BUILD_DIRS := $(patsubst %, $(BUILD_DIR)/tune/%, $(_TUNE_DIRS))
229+
221230
BUILD_DIRS := \
222231
$(BUILD_DIR) \
232+
$(TUNE_BUILD_DIRS) \
223233
$(patsubst %, $(BUILD_DIR)/%, $(DIRS)) \
224234
$(patsubst %, $(BUILD_DIR)/%/profile, $(DIRS)) \
225235
$(patsubst %, $(BUILD_DIR)/%/test, $(DIRS)) \
@@ -299,12 +309,12 @@ ifneq ($(WANT_NTL), 0)
299309
interfaces_TEST_SOURCES := $(SRC_DIR)/interfaces/test/t-NTL-interface.cpp
300310
endif
301311

302-
define xxx_TUNE_SOURCES
303-
$(1)_TUNE_SOURCES := $(wildcard $(SRC_DIR)/$(1)/tune/*.c)
312+
define xxx_OLD_TUNE_SOURCES
313+
$(1)_OLD_TUNE_SOURCES := $(wildcard $(SRC_DIR)/$(1)/tune/*.c)
304314
endef
305-
_TUNE_SOURCES := $(wildcard $(SRC_DIR)/tune/*.c)
306-
$(foreach dir, $(DIRS), $(eval $(call xxx_TUNE_SOURCES,$(dir))))
307-
TUNE_SOURCES := $(foreach dir,$(DIRS),$($(dir)_TUNE_SOURCES)) $(_TUNE_SOURCES)
315+
_OLD_TUNE_SOURCES := $(wildcard $(SRC_DIR)/tune/*.c)
316+
$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNE_SOURCES,$(dir))))
317+
OLD_TUNE_SOURCES := $(foreach dir,$(DIRS),$($(dir)_OLD_TUNE_SOURCES)) $(_OLD_TUNE_SOURCES)
308318

309319
EXMP_SOURCES := $(wildcard $(FLINT_DIR)/examples/*.c)
310320

@@ -359,12 +369,12 @@ interfaces_TESTS := $(BUILD_DIR)/interfaces/test/t-NTL-interface$(EXEEXT)
359369
endif
360370
TESTS := $(_TESTS) $(foreach dir,$(DIRS),$($(dir)_TESTS)) $(interfaces_TESTS)
361371

362-
define xxx_TUNES
363-
$(1)_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$($(1)_TUNE_SOURCES))
372+
define xxx_OLD_TUNES
373+
$(1)_OLD_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$($(1)_OLD_TUNE_SOURCES))
364374
endef
365-
_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(_TUNE_SOURCES))
366-
$(foreach dir, $(DIRS), $(eval $(call xxx_TUNES,$(dir))))
367-
TUNES := $(foreach dir,$(DIRS),$($(dir)_TUNES)) $(_TUNES)
375+
_OLD_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(_OLD_TUNE_SOURCES))
376+
$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNES,$(dir))))
377+
OLD_TUNES := $(foreach dir,$(DIRS),$($(dir)_OLD_TUNES)) $(_OLD_TUNES)
368378

369379
EXMPS := $(patsubst $(FLINT_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(EXMP_SOURCES))
370380

@@ -675,30 +685,20 @@ endif
675685
endif
676686

677687
ifeq ($(SHARED), 0)
678-
$(BUILD_DIR)/tune/%$(EXEEXT): $(SRC_DIR)/tune/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/tune
679-
@echo " CC $(<:$(SRC_DIR)/%=%)"
680-
@$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS)
681-
else
682-
$(BUILD_DIR)/tune/%$(EXEEXT): $(SRC_DIR)/tune/%.c | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR)/tune
683-
@echo " CC $(<:$(SRC_DIR)/%=%)"
684-
@$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS)
685-
endif
686-
687-
ifeq ($(SHARED), 0)
688-
define xxx_TUNES_rule
688+
define xxx_OLD_TUNES_rule
689689
$(BUILD_DIR)/$(1)/tune/%$(EXEEXT): $(SRC_DIR)/$(1)/tune/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/$(1)/tune
690690
@echo " CC $$(<:$(SRC_DIR)/%=%)"
691691
@$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $$< -o $$@ $(EXE_LDFLAGS) $(LIBS2) $$(DEPFLAGS)
692692
endef
693693
else
694-
define xxx_TUNES_rule
694+
define xxx_OLD_TUNES_rule
695695
$(BUILD_DIR)/$(1)/tune/%$(EXEEXT): $(SRC_DIR)/$(1)/tune/%.c | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR)/$(1)/tune
696696
@echo " CC $$(<:$(SRC_DIR)/%=%)"
697697
@$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $$< -o $$@ $(EXE_LDFLAGS) $(LIBS2) $$(DEPFLAGS)
698698
endef
699699
endif
700700

701-
$(foreach dir, $(DIRS), $(eval $(call xxx_TUNES_rule,$(dir))))
701+
$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNES_rule,$(dir))))
702702

703703
ifeq ($(SHARED), 0)
704704
$(BUILD_DIR)/examples/%$(EXEEXT): $(FLINT_DIR)/examples/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/examples $(BUILD_DIR)/include
@@ -834,7 +834,35 @@ endif
834834
# tuning
835835
################################################################################
836836

837-
tune: library $(TUNES)
837+
# Only old tunes, i.e. under src/MOD/tune/
838+
old_tune: library $(OLD_TUNES)
839+
840+
TUNE_SOURCE:=$(TUNE_DIR)/tune.c
841+
TUNE_DEPS_SOURCES:=$(foreach dir,$(TUNE_DIRS),$(wildcard $(dir)/*.c))
842+
843+
_TUNE_HEADERS:=tune.h clock.h
844+
TUNE_HEADERS:=$(patsubst %,$(TUNE_DIR)/%,$(_TUNE_HEADERS))
845+
846+
TUNE_DEPS_OBJS:=$(patsubst $(TUNE_DIR)/%.c,$(BUILD_DIR)/tune/%.o,$(TUNE_DEPS_SOURCES))
847+
848+
TUNE_EXE:=$(BUILD_DIR)/tuneup$(EXEEXT)
849+
850+
# Sloppy to say that all TUNE_BUILD_DIRS have to be built.
851+
$(BUILD_DIR)/tune/%.o: $(TUNE_DIR)/%.c $(TUNE_HEADERS) | $(TUNE_BUILD_DIRS)
852+
@echo " CC $(<:$(SRC_DIR)/%=%)"
853+
@$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) -c $< -o $@ $(DEPFLAGS)
854+
855+
ifeq ($(SHARED), 0)
856+
$(TUNE_EXE): $(TUNE_SOURCE) $(TUNE_DEPS_OBJS) $(TUNE_HEADERS) $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)
857+
@echo " CC $(<:$(SRC_DIR)/%=%)"
858+
@$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) $(TUNE_DEPS_OBJS) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS)
859+
else
860+
$(TUNE_EXE): $(TUNE_SOURCE) $(TUNE_DEPS_OBJS) $(TUNE_HEADERS) | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR)
861+
@echo " CC $(<:$(SRC_DIR)/%=%)"
862+
@$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) $(TUNE_DEPS_OBJS) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS)
863+
endif
864+
865+
tune: $(TUNE_EXE)
838866

839867
################################################################################
840868
# valgrind
@@ -988,5 +1016,5 @@ dist:
9881016
print-%:
9891017
@echo "$*=$($*)"
9901018

991-
.PHONY: all library shared static examples checkexamples profile tests check tune valgrind clean distclean install uninstall dist %_TEST_RUN %_TEST_RUN_% %_TEST_DGB_RUN_ARGS %_VALGRIND_RUN print-% coverage coverage_html debug
1019+
.PHONY: all library shared static examples checkexamples profile tests check tune old_tune valgrind clean distclean install uninstall dist %_TEST_RUN %_TEST_RUN_% %_TEST_DGB_RUN_ARGS %_VALGRIND_RUN print-% coverage coverage_html debug
9921020
.PRECIOUS: $(mpn_extras_PIC_S_SOURCES) $(mpn_extras_S_SOURCES)

src/limb_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
extern "C" {
1919
#endif
2020

21+
typedef struct { ulong m0, m1; } nn_pair_t;
22+
2123
#define FLINT_MAX_FACTORS_IN_LIMB 15
2224

2325
typedef struct

src/tune/README.md

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Tuning-suite
2+
3+
Currently working-in-progress, but feedback is much appreciated.
4+
5+
## Usage
6+
7+
Run `make tune` followed by `./build/tuneup`. This pushes all the optimized
8+
parameters into `stdout`. These can then be used to optimize the parameter file
9+
`flint-mparam.h` for your system.
10+
11+
### Set CPU frequency
12+
13+
If tuner is using clock ticks (currently only for x86-64), you can specify your
14+
clock frequency by pushing `export FLINT_CPU_FREQUENCY=3.1e9` to tell the tuner
15+
that your CPU frequency is 3.1 GHz.
16+
17+
### Options
18+
19+
Currently, no command-line options are allowed apart from `-h` and `--help` to
20+
display the usual help message.
21+
22+
However, it would be optimal to be able to specify:
23+
24+
- Functions intended to benchmark (currently does all available)
25+
- Minimum number of runs
26+
- Warmup runs (currently, 10 is the default)
27+
- Minimum amount of time to run each function (?)
28+
- Precision required to terminate successfully (currently 1.25 %)
29+
- Percentage of runs required to be within said precision (currently 13.5 %)
30+
31+
## Issues
32+
33+
Please open up any issues at <https://github.com/flintlib/flint/issues>.
34+
35+
## Requirements
36+
37+
- FLINT was built with Autotools.
38+
- Either that
39+
* `clock_gettime` is available on the system, or that
40+
* compiler is GCC compatible and architecture is x86.
41+
42+
## How it works
43+
44+
The program works in the following order:
45+
46+
1. Parses options
47+
2. Sets default values
48+
3. For each function (that is, each variant of each function) tested:
49+
a. Run a couple of warmups, which are trashed
50+
b. Run hotlaps, of which the time is saved
51+
c. Check if there is a smallest time elapsed $t$ for running a function of
52+
which at least $k$ of the runs have a time in the interval
53+
$[t, (1 + p) t]$, where $p$ is the precision and $k / n$ is the percentage
54+
of runs required to be within said precision, where $n$ is the total
55+
number of runs. If no such $t$ was found, abort.
56+
4. With all $t$ obtained from each families of functions, determine cutoff
57+
points, methods used, etc.
58+
5. Print the associated `#define` into `stdout`.

src/tune/clock.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
Copyright (C) 2024 Albin Ahlbäck
3+
4+
This file is part of FLINT.
5+
6+
FLINT is free software: you can redistribute it and/or modify it under
7+
the terms of the GNU Lesser General Public License (LGPL) as published
8+
by the Free Software Foundation; either version 3 of the License, or
9+
(at your option) any later version. See <https://www.gnu.org/licenses/>.
10+
*/
11+
12+
#ifndef FLINT_CLOCK_H
13+
#define FLINT_CLOCK_H
14+
15+
#include <stdlib.h>
16+
#include <time.h>
17+
#include "flint.h"
18+
19+
#define FLINT_CPU_FREQUENCY_DEFAULT 3.2e9
20+
21+
#if FLINT64 && defined(__amd64__)
22+
typedef ulong flint_time_t[1];
23+
24+
FLINT_FORCE_INLINE
25+
double flint_time_nsec_diff(flint_time_t t1, flint_time_t t0)
26+
{
27+
char * str = getenv("FLINT_CPU_FREQUENCY");
28+
double freq;
29+
double seconds;
30+
31+
if (str == NULL)
32+
freq = FLINT_CPU_FREQUENCY_DEFAULT;
33+
else
34+
freq = strtod(str, NULL);
35+
36+
seconds = (double) (*t1 - *t0) / freq;
37+
38+
return seconds * 10e9;
39+
}
40+
41+
FLINT_FORCE_INLINE
42+
void flint_time_get(flint_time_t t0)
43+
{
44+
__asm__ volatile (
45+
"rdtsc\n\t"
46+
"shl $32, %%rdx\n\t"
47+
"or %%rdx, %0"
48+
: "=a" (*t0) : : "rdx");
49+
}
50+
#else
51+
typedef struct timespec flint_time_t[1];
52+
53+
FLINT_FORCE_INLINE
54+
double flint_time_nsec_diff(flint_time_t t1, flint_time_t t0)
55+
{
56+
return 1000000000.0 * (t1->tv_sec - t0->tv_sec)
57+
+ (double) (t1->tv_nsec - t0->tv_nsec);
58+
}
59+
60+
FLINT_FORCE_INLINE
61+
void flint_time_get(flint_time_t t0)
62+
{
63+
return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t0);
64+
}
65+
#endif
66+
67+
#endif /* FLINT_CLOCK_H */

src/tune/n_mod_vec/aors_0.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
Copyright (C) 2024 Albin Ahlbäck
3+
4+
This file is part of FLINT.
5+
6+
FLINT is free software: you can redistribute it and/or modify it under
7+
the terms of the GNU Lesser General Public License (LGPL) as published
8+
by the Free Software Foundation; either version 3 of the License, or
9+
(at your option) any later version. See <https://www.gnu.org/licenses/>.
10+
*/
11+
12+
#include "flint-mparam.h"
13+
#include "n_mod_vec.h"
14+
15+
#undef N_MOD_VEC_ADD_METHOD
16+
#undef N_MOD_VEC_SUB_METHOD
17+
#define TUNE_PROGRAM 1
18+
19+
#define N_MOD_VEC_ADD_METHOD 0
20+
#define N_MOD_VEC_SUB_METHOD 0
21+
22+
#define _n_mod_vec_add _n_mod_vec_add_0
23+
#define _n_mod_vec_sub _n_mod_vec_sub_0
24+
25+
#include "n_mod_vec/aors.c"

src/tune/n_mod_vec/aors_1.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
Copyright (C) 2024 Albin Ahlbäck
3+
4+
This file is part of FLINT.
5+
6+
FLINT is free software: you can redistribute it and/or modify it under
7+
the terms of the GNU Lesser General Public License (LGPL) as published
8+
by the Free Software Foundation; either version 3 of the License, or
9+
(at your option) any later version. See <https://www.gnu.org/licenses/>.
10+
*/
11+
12+
#include "flint-mparam.h"
13+
#include "n_mod_vec.h"
14+
15+
#undef N_MOD_VEC_ADD_METHOD
16+
#undef N_MOD_VEC_SUB_METHOD
17+
#define TUNE_PROGRAM 1
18+
19+
#define N_MOD_VEC_ADD_METHOD 1
20+
#define N_MOD_VEC_SUB_METHOD 1
21+
22+
#define _n_mod_vec_add _n_mod_vec_add_1
23+
#define _n_mod_vec_sub _n_mod_vec_sub_1
24+
25+
#include "n_mod_vec/aors.c"

src/tune/n_mod_vec/param.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
Copyright (C) 2024 Albin Ahlbäck
3+
4+
This file is part of FLINT.
5+
6+
FLINT is free software: you can redistribute it and/or modify it under
7+
the terms of the GNU Lesser General Public License (LGPL) as published
8+
by the Free Software Foundation; either version 3 of the License, or
9+
(at your option) any later version. See <https://www.gnu.org/licenses/>.
10+
*/
11+
12+
#include "tune.h"
13+
#include "n_mod.h"
14+
#include "n_mod_vec.h"
15+
16+
#if FLINT64
17+
# define N_0 UWORD(7365182178263871635)
18+
#else
19+
# define N_0 UWORD(1236571635)
20+
#endif
21+
22+
void * n_mod_vec_param_init_generate_0(void)
23+
{
24+
struct n_mod_vec_param_0 * param;
25+
nn_ptr rp, ap, bp;
26+
slong len;
27+
flint_rand_t state;
28+
29+
param = flint_malloc(sizeof(struct n_mod_vec_param_0));
30+
flint_rand_init(state);
31+
32+
len = 1000;
33+
rp = flint_malloc(sizeof(ulong) * len);
34+
ap = flint_malloc(sizeof(ulong) * len);
35+
bp = flint_malloc(sizeof(ulong) * len);
36+
n_mod_ctx_init(param->ctx, N_0);
37+
38+
_n_mod_vec_rand(ap, state, len, param->ctx);
39+
_n_mod_vec_rand(bp, state, len, param->ctx);
40+
41+
param->rp = rp;
42+
param->ap = ap;
43+
param->bp = bp;
44+
param->len = len;
45+
flint_rand_clear(state);
46+
47+
return param;
48+
}
49+
50+
void n_mod_vec_param_clear(void * vparam)
51+
{
52+
struct n_mod_vec_param_0 * param = vparam;
53+
54+
flint_free(param->rp);
55+
flint_free(param->ap);
56+
flint_free(param->bp);
57+
n_mod_ctx_clear(param->ctx);
58+
flint_free(param);
59+
}

0 commit comments

Comments
 (0)