Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 1 addition & 19 deletions Common/include/CConfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "option_structure.hpp"
#include "containers/container_decorators.hpp"
#include "toolboxes/printing_toolbox.hpp"
#include "tracy_structure.hpp"

#ifdef HAVE_CGNS
#include "cgnslib.h"
Expand Down Expand Up @@ -9097,25 +9098,6 @@ class CConfig {
*/
unsigned long GetNonphysical_Reconstr(void) const { return Nonphys_Reconstr; }

/*!
* \brief Start the timer for profiling subroutines.
* \param[in] val_start_time - the value of the start time.
*/
void Tick(double *val_start_time);

/*!
* \brief Stop the timer for profiling subroutines and store results.
* \param[in] val_start_time - the value of the start time.
* \param[in] val_function_name - string for the name of the profiled subroutine.
* \param[in] val_group_id - string for the name of the profiled subroutine.
*/
void Tock(double val_start_time, const string& val_function_name, int val_group_id);

/*!
* \brief Write a CSV file containing the results of the profiling.
*/
void SetProfilingCSV(void);

/*!
* \brief Start the timer for profiling subroutines.
* \param[in] val_start_time - the value of the start time.
Expand Down
2 changes: 2 additions & 0 deletions Common/include/linear_algebra/CSysSolve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ class CSysSolve {
*/
template <class OtherType>
void HandleTemporariesIn(const CSysVector<OtherType>& LinSysRes, CSysVector<OtherType>& LinSysSol) {
SU2_ZONE_SCOPED
if constexpr (std::is_same_v<ScalarType, OtherType>) {
/*--- Same type specialization, temporary variables are not required. ---*/
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
Expand Down Expand Up @@ -273,6 +274,7 @@ class CSysSolve {
*/
template <class OtherType>
void HandleTemporariesOut(CSysVector<OtherType>& LinSysSol) {
SU2_ZONE_SCOPED
if constexpr (std::is_same_v<ScalarType, OtherType>) {
/*--- Same type specialization, temporary variables are not required. ---*/
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
Expand Down
12 changes: 8 additions & 4 deletions Common/include/tracy_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
* they can be completely "disabled" when compiling without tracy.
* \note Do not include tracy headers explicitly anywhere, use this header instead.
* \note To enable tracy, define the TRACY_ENABLE macro during compilation.
* \author Divyaprakash
* \version 8.4.0 "Harrier"
*
* SU2 Project Website: https://su2code.github.io
Expand All @@ -32,11 +31,16 @@

#pragma once

#ifdef HAVE_TRACY
#ifdef TRACY_ENABLE
#include "tracy/Tracy.hpp"
#define SU2_ZONE_SCOPED ZoneScoped
#define SU2_ZONE_SCOPED_N(name) ZoneScopedN(name)
#define SU2_ZONE_SCOPED ZoneScoped;
#define SU2_ZONE_SCOPED_N(name) ZoneScopedN(name);
#else
#define SU2_ZONE_SCOPED
#define SU2_ZONE_SCOPED_N(name)
#endif

#define BEGIN_SU2_ZONE_N(name) \
{ \
SU2_ZONE_SCOPED_N(name)
#define END_SU2_ZONE }
192 changes: 0 additions & 192 deletions Common/src/CConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,6 @@ using namespace PrintingToolbox;
#endif
#endif

vector<string> Profile_Function_tp; /*!< \brief Vector of string names for profiled functions. */
vector<double> Profile_Time_tp; /*!< \brief Vector of elapsed time for profiled functions. */
vector<double> Profile_ID_tp; /*!< \brief Vector of group ID number for profiled functions. */
map<string, vector<int> > Profile_Map_tp; /*!< \brief Map containing the final results for profiled functions. */

map<CLong3T, int> GEMM_Profile_MNK; /*!< \brief Map, which maps the GEMM size to the index where
the data for this GEMM is stored in several vectors. */
vector<long> GEMM_Profile_NCalls; /*!< \brief Vector, which stores the number of calls to this
Expand All @@ -59,8 +54,6 @@ vector<double> GEMM_Profile_TotTime; /*!< \brief Total time spent for this
vector<double> GEMM_Profile_MinTime; /*!< \brief Minimum time spent for this GEMM size. */
vector<double> GEMM_Profile_MaxTime; /*!< \brief Maximum time spent for this GEMM size. */

//#pragma omp threadprivate(Profile_Function_tp, Profile_Time_tp, Profile_ID_tp, Profile_Map_tp)


CConfig::CConfig(char case_filename[MAX_STRING_SIZE], SU2_COMPONENT val_software, bool verb_high) {

Expand Down Expand Up @@ -9953,191 +9946,6 @@ short CConfig::FindInterfaceMarker(unsigned short iInterface) const {
return -1;
}

void CConfig::Tick(double *val_start_time) {

#ifdef PROFILE
*val_start_time = SU2_MPI::Wtime();
#endif

}

void CConfig::Tock(double val_start_time, const string& val_function_name, int val_group_id) {

#ifdef PROFILE

double val_stop_time = 0.0, val_elapsed_time = 0.0;

val_stop_time = SU2_MPI::Wtime();

/*--- Compute the elapsed time for this subroutine ---*/
val_elapsed_time = val_stop_time - val_start_time;

/*--- Store the subroutine name and the elapsed time ---*/
Profile_Function_tp.push_back(val_function_name);
Profile_Time_tp.push_back(val_elapsed_time);
Profile_ID_tp.push_back(val_group_id);

#endif

}

void CConfig::SetProfilingCSV() {

#ifdef PROFILE

int rank = MASTER_NODE;
int size = SINGLE_NODE;
#ifdef HAVE_MPI
SU2_MPI::Comm_rank(SU2_MPI::GetComm(), &rank);
SU2_MPI::Comm_size(SU2_MPI::GetComm(), &size);
#endif

/*--- Each rank has the same stack trace, so the they have the same
function calls and ordering in the vectors. We're going to reduce
the timings from each rank and extract the avg, min, and max timings. ---*/

/*--- First, create a local mapping, so that we can extract the
min and max values for each function. ---*/

for (unsigned int i = 0; i < Profile_Function_tp.size(); i++) {

/*--- Add the function and initialize if not already stored (the ID
only needs to be stored the first time).---*/
if (Profile_Map_tp.find(Profile_Function_tp[i]) == Profile_Map_tp.end()) {

vector<int> profile; profile.push_back(i);
Profile_Map_tp.insert(pair<string,vector<int> >(Profile_Function_tp[i],profile));

} else {

/*--- This function has already been added, so simply increment the
number of calls and total time for this function. ---*/

Profile_Map_tp[Profile_Function_tp[i]].push_back(i);

}
}

/*--- We now have everything gathered by function name, so we can loop over
each function and store the min/max times. ---*/

int map_size = 0;
for (map<string,vector<int> >::iterator it=Profile_Map_tp.begin(); it!=Profile_Map_tp.end(); ++it) {
map_size++;
}

/*--- Allocate and initialize memory ---*/

double *l_min_red = nullptr, *l_max_red = nullptr, *l_tot_red = nullptr, *l_avg_red = nullptr;
int *n_calls_red = nullptr;
double* l_min = new double[map_size];
double* l_max = new double[map_size];
double* l_tot = new double[map_size];
double* l_avg = new double[map_size];
int* n_calls = new int[map_size];
for (int i = 0; i < map_size; i++)
{
l_min[i] = 1e10;
l_max[i] = 0.0;
l_tot[i] = 0.0;
l_avg[i] = 0.0;
n_calls[i] = 0;
}

/*--- Collect the info for each function from the current rank ---*/

int func_counter = 0;
for (map<string,vector<int> >::iterator it=Profile_Map_tp.begin(); it!=Profile_Map_tp.end(); ++it) {

for (unsigned int i = 0; i < (it->second).size(); i++) {
n_calls[func_counter]++;
l_tot[func_counter] += Profile_Time_tp[(it->second)[i]];
if (Profile_Time_tp[(it->second)[i]] < l_min[func_counter])
l_min[func_counter] = Profile_Time_tp[(it->second)[i]];
if (Profile_Time_tp[(it->second)[i]] > l_max[func_counter])
l_max[func_counter] = Profile_Time_tp[(it->second)[i]];

}
l_avg[func_counter] = l_tot[func_counter]/((double)n_calls[func_counter]);
func_counter++;
}

/*--- Now reduce the data ---*/

if (rank == MASTER_NODE) {
l_min_red = new double[map_size];
l_max_red = new double[map_size];
l_tot_red = new double[map_size];
l_avg_red = new double[map_size];
n_calls_red = new int[map_size];
}

#ifdef HAVE_MPI
MPI_Reduce(n_calls, n_calls_red, map_size, MPI_INT, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
MPI_Reduce(l_tot, l_tot_red, map_size, MPI_DOUBLE, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
MPI_Reduce(l_avg, l_avg_red, map_size, MPI_DOUBLE, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
MPI_Reduce(l_min, l_min_red, map_size, MPI_DOUBLE, MPI_MIN, MASTER_NODE, SU2_MPI::GetComm());
MPI_Reduce(l_max, l_max_red, map_size, MPI_DOUBLE, MPI_MAX, MASTER_NODE, SU2_MPI::GetComm());
#else
memcpy(n_calls_red, n_calls, map_size*sizeof(int));
memcpy(l_tot_red, l_tot, map_size*sizeof(double));
memcpy(l_avg_red, l_avg, map_size*sizeof(double));
memcpy(l_min_red, l_min, map_size*sizeof(double));
memcpy(l_max_red, l_max, map_size*sizeof(double));
#endif

/*--- The master rank will write the file ---*/

if (rank == MASTER_NODE) {

/*--- Take averages over all ranks on the master ---*/

for (int i = 0; i < map_size; i++) {
l_tot_red[i] = l_tot_red[i]/(double)size;
l_avg_red[i] = l_avg_red[i]/(double)size;
n_calls_red[i] = n_calls_red[i]/size;
}

/*--- Now write a CSV file with the processed results ---*/

ofstream Profile_File;
Profile_File.precision(15);
Profile_File.open("profiling.csv");

/*--- Create the CSV header ---*/

Profile_File << "\"Function_Name\", \"N_Calls\", \"Avg_Total_Time\", \"Avg_Time\", \"Min_Time\", \"Max_Time\", \"Function_ID\"" << endl;

/*--- Loop through the map and write the results to the file ---*/

func_counter = 0;
for (map<string,vector<int> >::iterator it=Profile_Map_tp.begin(); it!=Profile_Map_tp.end(); ++it) {

Profile_File << scientific << it->first << ", " << n_calls_red[func_counter] << ", " << l_tot_red[func_counter] << ", " << l_avg_red[func_counter] << ", " << l_min_red[func_counter] << ", " << l_max_red[func_counter] << ", " << (int)Profile_ID_tp[(it->second)[0]] << endl;
func_counter++;
}

Profile_File.close();

}

delete [] l_min;
delete [] l_max;
delete [] l_avg;
delete [] l_tot;
delete [] n_calls;
if (rank == MASTER_NODE) {
delete [] l_min_red;
delete [] l_max_red;
delete [] l_avg_red;
delete [] l_tot_red;
delete [] n_calls_red;
}

#endif

}

void CConfig::GEMM_Tick(double *val_start_time) const {

#ifdef PROFILE
Expand Down
Loading
Loading