scalometer/include/strategy.hpp

88 lines
2.9 KiB
C++
Raw Permalink Normal View History

2024-12-13 00:33:08 +01:00
#ifndef STRATEGY_HPP
#define STRATEGY_HPP
#include <omp.h>
#include <stdexcept>
#include <string>
2024-12-16 14:51:38 +01:00
#ifdef ENABLE_EVENTIFY
#include <eventify/task_system.hxx>
#endif
2024-12-13 12:01:35 +01:00
// Parallelization strategies are defined here. Assumption for now: there is
// always an outer loop than can be parallelized. The strategies are templates
// instanciated when adding kernels to the kernel registry.
// Here, we only define the treatment of the outermost loop. The loop bodies
// are defined in kernels.cpp
2024-12-13 00:33:08 +01:00
namespace strategy {
2024-12-13 12:01:35 +01:00
// define concept to ensure that the loop bodies defined in kernels.cpp
// represent one invocable iteration of a parallel loop
template <typename Func>
concept invocable_with_int = requires(Func&& f, int i) {
{ std::forward<Func>(f)(i) }; // Checks if calling f(i) is valid
};
// for OpenMP, we just use the for pragma for the outermost loop
template <typename Func>
requires invocable_with_int<Func>
void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads,
Func&& loop_body) {
omp_set_num_threads(static_cast<int>(n_threads));
#pragma omp parallel for schedule(static)
for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {
loop_body(i);
2024-12-13 00:33:08 +01:00
}
2024-12-13 12:01:35 +01:00
}
2024-12-16 14:51:38 +01:00
#ifdef ENABLE_EVENTIFY
2024-12-13 12:01:35 +01:00
// for eventify, we calculate indices for evenly divided chunks of the outermost
// loop, create independent tasks and submit them to the tasking system
template <typename Func>
2024-12-13 00:33:08 +01:00
requires invocable_with_int<Func>
2024-12-13 12:01:35 +01:00
void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks,
Func&& loop_body) {
auto task_system = eventify::task_system{};
int tripcount = kernel_end_idx - kernel_start_idx + 1;
int chunk_size = tripcount / n_tasks;
int remainder = tripcount % n_tasks;
for (int tid = 0; tid < n_tasks; ++tid) {
auto task = [tid, tripcount, chunk_size, remainder, loop_body] {
int start_idx = tid * chunk_size;
int end_idx = start_idx + chunk_size - 1;
if (tripcount - end_idx == remainder) end_idx += remainder;
for (int i = start_idx; i < end_idx; ++i) {
loop_body(i);
2024-12-13 00:33:08 +01:00
}
2024-12-13 12:01:35 +01:00
};
task_system.submit(task);
2024-12-13 00:33:08 +01:00
}
2024-12-13 12:01:35 +01:00
}
2024-12-16 14:51:38 +01:00
#endif //ENABLE_EVENTIFY
2024-12-13 12:01:35 +01:00
// parallelization strategy selector
template <typename Func>
requires invocable_with_int<Func>
void execute_strategy(const std::string& strategy_name, int kernel_start_idx,
int kernel_end_idx, int num_threads_or_tasks,
Func&& loop_body) {
if (strategy_name == "omp") {
openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
std::forward<Func>(loop_body));
2024-12-16 14:51:38 +01:00
#ifdef ENABLE_EVENTIFY
2024-12-13 12:01:35 +01:00
} else if (strategy_name == "eventify") {
eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
std::forward<Func>(loop_body));
2024-12-16 14:51:38 +01:00
#endif
2024-12-13 12:01:35 +01:00
} else {
throw std::invalid_argument("Unknown strategy: " + strategy_name);
}
2024-12-13 00:33:08 +01:00
}
2024-12-13 12:01:35 +01:00
} // namespace strategy
#endif // STRATEGY_HPP