88 lines
2.9 KiB
C++
88 lines
2.9 KiB
C++
#ifndef STRATEGY_HPP
|
|
#define STRATEGY_HPP
|
|
|
|
#include <omp.h>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
|
|
#ifdef ENABLE_EVENTIFY
|
|
#include <eventify/task_system.hxx>
|
|
#endif
|
|
|
|
// Parallelization strategies are defined here. Assumption for now: there is
|
|
// always an outer loop than can be parallelized. The strategies are templates
|
|
// instanciated when adding kernels to the kernel registry.
|
|
// Here, we only define the treatment of the outermost loop. The loop bodies
|
|
// are defined in kernels.cpp
|
|
|
|
namespace strategy {
|
|
|
|
// define concept to ensure that the loop bodies defined in kernels.cpp
|
|
// represent one invocable iteration of a parallel loop
|
|
template <typename Func>
|
|
concept invocable_with_int = requires(Func&& f, int i) {
|
|
{ std::forward<Func>(f)(i) }; // Checks if calling f(i) is valid
|
|
};
|
|
|
|
// for OpenMP, we just use the for pragma for the outermost loop
|
|
template <typename Func>
|
|
requires invocable_with_int<Func>
|
|
void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads,
|
|
Func&& loop_body) {
|
|
omp_set_num_threads(static_cast<int>(n_threads));
|
|
|
|
#pragma omp parallel for schedule(static)
|
|
for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {
|
|
loop_body(i);
|
|
}
|
|
}
|
|
|
|
#ifdef ENABLE_EVENTIFY
|
|
// for eventify, we calculate indices for evenly divided chunks of the outermost
|
|
// loop, create independent tasks and submit them to the tasking system
|
|
template <typename Func>
|
|
requires invocable_with_int<Func>
|
|
void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks,
|
|
Func&& loop_body) {
|
|
auto task_system = eventify::task_system{};
|
|
int tripcount = kernel_end_idx - kernel_start_idx + 1;
|
|
int chunk_size = tripcount / n_tasks;
|
|
int remainder = tripcount % n_tasks;
|
|
|
|
for (int tid = 0; tid < n_tasks; ++tid) {
|
|
auto task = [tid, tripcount, chunk_size, remainder, loop_body] {
|
|
int start_idx = tid * chunk_size;
|
|
int end_idx = start_idx + chunk_size - 1;
|
|
if (tripcount - end_idx == remainder) end_idx += remainder;
|
|
|
|
for (int i = start_idx; i < end_idx; ++i) {
|
|
loop_body(i);
|
|
}
|
|
};
|
|
task_system.submit(task);
|
|
}
|
|
}
|
|
#endif //ENABLE_EVENTIFY
|
|
|
|
// parallelization strategy selector
|
|
template <typename Func>
|
|
requires invocable_with_int<Func>
|
|
void execute_strategy(const std::string& strategy_name, int kernel_start_idx,
|
|
int kernel_end_idx, int num_threads_or_tasks,
|
|
Func&& loop_body) {
|
|
if (strategy_name == "omp") {
|
|
openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
|
|
std::forward<Func>(loop_body));
|
|
#ifdef ENABLE_EVENTIFY
|
|
} else if (strategy_name == "eventify") {
|
|
eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
|
|
std::forward<Func>(loop_body));
|
|
#endif
|
|
} else {
|
|
throw std::invalid_argument("Unknown strategy: " + strategy_name);
|
|
}
|
|
}
|
|
|
|
} // namespace strategy
|
|
#endif // STRATEGY_HPP
|