scalometer/include/strategy.hpp

#ifndef STRATEGY_HPP
#define STRATEGY_HPP

#include <omp.h>
#include <stdexcept>
#include <string>

#ifdef ENABLE_EVENTIFY
#include <eventify/task_system.hxx>
#endif

// Parallelization strategies are defined here. Assumption for now: there is
// always an outer loop than can be parallelized. The strategies are templates
// instanciated when adding kernels to the kernel registry. 
// Here, we only define the treatment of the outermost loop. The loop bodies 
// are defined in kernels.cpp

namespace strategy {

// define concept to ensure that the loop bodies defined in kernels.cpp
// represent one invocable iteration of a parallel loop
template <typename Func>
concept invocable_with_int = requires(Func&& f, int i) {
  { std::forward<Func>(f)(i) };  // Checks if calling f(i) is valid
};

// for OpenMP, we just use the for pragma for the outermost loop
template <typename Func>
  requires invocable_with_int<Func>
void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads,
                     Func&& loop_body) {
  omp_set_num_threads(static_cast<int>(n_threads));

#pragma omp parallel for schedule(static)
  for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {
    loop_body(i);
  }
}

#ifdef ENABLE_EVENTIFY
// for eventify, we calculate indices for evenly divided chunks of the outermost
// loop, create independent tasks and submit them to the tasking system
template <typename Func>
  requires invocable_with_int<Func>
void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks,
                       Func&& loop_body) {
  auto task_system = eventify::task_system{};
  int tripcount = kernel_end_idx - kernel_start_idx + 1;
  int chunk_size = tripcount / n_tasks;
  int remainder = tripcount % n_tasks;

  for (int tid = 0; tid < n_tasks; ++tid) {
    auto task = [tid, tripcount, chunk_size, remainder, loop_body] {
      int start_idx = tid * chunk_size;
      int end_idx = start_idx + chunk_size - 1;
      if (tripcount - end_idx == remainder) end_idx += remainder;

      for (int i = start_idx; i < end_idx; ++i) {
        loop_body(i);
      }
    };
    task_system.submit(task);
  }
}
#endif //ENABLE_EVENTIFY
       
// parallelization strategy selector
template <typename Func>
  requires invocable_with_int<Func>
void execute_strategy(const std::string& strategy_name, int kernel_start_idx,
                      int kernel_end_idx, int num_threads_or_tasks,
                      Func&& loop_body) {
  if (strategy_name == "omp") {
    openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
                    std::forward<Func>(loop_body));
#ifdef ENABLE_EVENTIFY
  } else if (strategy_name == "eventify") {
    eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
                      std::forward<Func>(loop_body));
#endif
  } else {
    throw std::invalid_argument("Unknown strategy: " + strategy_name);
  }
}

}  // namespace strategy
#endif  // STRATEGY_HPP
Initial commit 2024-12-13 00:33:08 +01:00			`#ifndef STRATEGY_HPP`
			`#define STRATEGY_HPP`

			`#include <omp.h>`
			`#include <stdexcept>`
			`#include <string>`

Conditional compilation of eventify 2024-12-16 14:51:38 +01:00			`#ifdef ENABLE_EVENTIFY`
			`#include <eventify/task_system.hxx>`
			`#endif`

Applied Google formatting 2024-12-13 12:01:35 +01:00			`// Parallelization strategies are defined here. Assumption for now: there is`
			`// always an outer loop than can be parallelized. The strategies are templates`
			`// instanciated when adding kernels to the kernel registry.`
			`// Here, we only define the treatment of the outermost loop. The loop bodies`
			`// are defined in kernels.cpp`
Initial commit 2024-12-13 00:33:08 +01:00
			`namespace strategy {`

Applied Google formatting 2024-12-13 12:01:35 +01:00			`// define concept to ensure that the loop bodies defined in kernels.cpp`
			`// represent one invocable iteration of a parallel loop`
			`template <typename Func>`
			`concept invocable_with_int = requires(Func&& f, int i) {`
			`{ std::forward<Func>(f)(i) }; // Checks if calling f(i) is valid`
			`};`

			`// for OpenMP, we just use the for pragma for the outermost loop`
			`template <typename Func>`
			`requires invocable_with_int<Func>`
			`void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads,`
			`Func&& loop_body) {`
			`omp_set_num_threads(static_cast<int>(n_threads));`

			`#pragma omp parallel for schedule(static)`
			`for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {`
			`loop_body(i);`
Initial commit 2024-12-13 00:33:08 +01:00			`}`
Applied Google formatting 2024-12-13 12:01:35 +01:00			`}`

Conditional compilation of eventify 2024-12-16 14:51:38 +01:00			`#ifdef ENABLE_EVENTIFY`
Applied Google formatting 2024-12-13 12:01:35 +01:00			`// for eventify, we calculate indices for evenly divided chunks of the outermost`
			`// loop, create independent tasks and submit them to the tasking system`
			`template <typename Func>`
Initial commit 2024-12-13 00:33:08 +01:00			`requires invocable_with_int<Func>`
Applied Google formatting 2024-12-13 12:01:35 +01:00			`void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks,`
			`Func&& loop_body) {`
			`auto task_system = eventify::task_system{};`
			`int tripcount = kernel_end_idx - kernel_start_idx + 1;`
			`int chunk_size = tripcount / n_tasks;`
			`int remainder = tripcount % n_tasks;`

			`for (int tid = 0; tid < n_tasks; ++tid) {`
			`auto task = [tid, tripcount, chunk_size, remainder, loop_body] {`
			`int start_idx = tid * chunk_size;`
			`int end_idx = start_idx + chunk_size - 1;`
			`if (tripcount - end_idx == remainder) end_idx += remainder;`

			`for (int i = start_idx; i < end_idx; ++i) {`
			`loop_body(i);`
Initial commit 2024-12-13 00:33:08 +01:00			`}`
Applied Google formatting 2024-12-13 12:01:35 +01:00			`};`
			`task_system.submit(task);`
Initial commit 2024-12-13 00:33:08 +01:00			`}`
Applied Google formatting 2024-12-13 12:01:35 +01:00			`}`
Conditional compilation of eventify 2024-12-16 14:51:38 +01:00			`#endif //ENABLE_EVENTIFY`

Applied Google formatting 2024-12-13 12:01:35 +01:00			`// parallelization strategy selector`
			`template <typename Func>`
			`requires invocable_with_int<Func>`
			`void execute_strategy(const std::string& strategy_name, int kernel_start_idx,`
			`int kernel_end_idx, int num_threads_or_tasks,`
			`Func&& loop_body) {`
			`if (strategy_name == "omp") {`
			`openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,`
			`std::forward<Func>(loop_body));`
Conditional compilation of eventify 2024-12-16 14:51:38 +01:00			`#ifdef ENABLE_EVENTIFY`
Applied Google formatting 2024-12-13 12:01:35 +01:00			`} else if (strategy_name == "eventify") {`
			`eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,`
			`std::forward<Func>(loop_body));`
Conditional compilation of eventify 2024-12-16 14:51:38 +01:00			`#endif`
Applied Google formatting 2024-12-13 12:01:35 +01:00			`} else {`
			`throw std::invalid_argument("Unknown strategy: " + strategy_name);`
			`}`
Initial commit 2024-12-13 00:33:08 +01:00			`}`
Applied Google formatting 2024-12-13 12:01:35 +01:00
			`} // namespace strategy`
			`#endif // STRATEGY_HPP`