73 lines
2.8 KiB
C++
73 lines
2.8 KiB
C++
|
#ifndef STRATEGY_HPP
|
||
|
#define STRATEGY_HPP
|
||
|
|
||
|
#include <omp.h>
|
||
|
#include <stdexcept>
|
||
|
#include <string>
|
||
|
#include <eventify/task_system.hxx>
|
||
|
|
||
|
// Parallelization strategies are defined here. Assumption for now: there is always an outer loop than can be parallelized.
|
||
|
// The strategies are templates instanciated when adding kernels to the kernel registry.
|
||
|
// Here, we only define the treatment of the outermost loop. The loop bodies are defined in kernels.cpp
|
||
|
|
||
|
namespace strategy {
|
||
|
|
||
|
// define concept to ensure that the loop bodies defined in kernels.cpp represent one invocable iteration of a parallel loop
|
||
|
template <typename Func>
|
||
|
concept invocable_with_int = requires(Func&& f, int i) {
|
||
|
{ std::forward<Func>(f)(i) }; // Checks if calling f(i) is valid
|
||
|
};
|
||
|
|
||
|
|
||
|
// for OpenMP, we just use the for pragma for the outermost loop
|
||
|
template <typename Func>
|
||
|
requires invocable_with_int <Func>
|
||
|
void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads, Func&& loop_body) {
|
||
|
omp_set_num_threads(static_cast<int>(n_threads));
|
||
|
|
||
|
#pragma omp parallel for schedule(static)
|
||
|
for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {
|
||
|
loop_body(i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// for eventify, we calculate indices for evenly divided chunks of the outermost loop,
|
||
|
// create independent tasks and submit them to the tasking system
|
||
|
template <typename Func>
|
||
|
requires invocable_with_int <Func>
|
||
|
void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks, Func&& loop_body) {
|
||
|
auto task_system = eventify::task_system {};
|
||
|
int tripcount = kernel_end_idx - kernel_start_idx + 1;
|
||
|
int chunk_size = tripcount / n_tasks;
|
||
|
int remainder = tripcount % n_tasks;
|
||
|
|
||
|
for (int tid = 0; tid < n_tasks; ++tid) {
|
||
|
auto task = [tid, tripcount, chunk_size, remainder, loop_body]{
|
||
|
int start_idx = tid * chunk_size;
|
||
|
int end_idx = start_idx + chunk_size - 1;
|
||
|
if (tripcount - end_idx == remainder) end_idx += remainder;
|
||
|
|
||
|
for (int i = start_idx; i < end_idx; ++i) {
|
||
|
loop_body(i);
|
||
|
}
|
||
|
};
|
||
|
task_system.submit(task);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// parallelization strategy selector
|
||
|
template <typename Func>
|
||
|
requires invocable_with_int<Func>
|
||
|
void execute_strategy(const std::string& strategy_name, int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks, Func&& loop_body) {
|
||
|
if (strategy_name == "omp") {
|
||
|
openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks, std::forward<Func>(loop_body));
|
||
|
} else if (strategy_name == "eventify") {
|
||
|
eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks, std::forward<Func>(loop_body));
|
||
|
} else {
|
||
|
throw std::invalid_argument("Unknown strategy: " + strategy_name);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
#endif //STRATEGY_HPP
|