Applied Google formatting

2024-12-13 12:01:35 +01:00 · 2024-12-13 12:01:35 +01:00 · 3f9e253f23
parent 8dd3de290b
commit 3f9e253f23
5 changed files with 171 additions and 149 deletions
--- a/include/kernels.hpp
+++ b/include/kernels.hpp
@ -1,41 +1,41 @@
 #ifndef KERNELS_HPP
 #define KERNELS_HPP
 #include <string>
 #include <functional>
 #include <string>
 #include <unordered_map>
 class Kernel {
-public:
+ public:
  using StrategyFunction = std::function<void(int, int, int)>;
  using PreparationFunction = std::function<void()>;
-  Kernel(const std::string& name, StrategyFunction strategy_function, PreparationFunction preparation_function);
+  Kernel(const std::string& name, StrategyFunction strategy_function,
         PreparationFunction preparation_function);
  void prepare() const;
  void execute(int n_threads_or_tasks, int kernel_tripcount) const;
-private:
+ private:
  std::string name_;
  StrategyFunction strategy_function_;
  PreparationFunction preparation_function_;
 };
 class KernelRegistry {
-public:
+ public:
  using KernelBuilder = std::function<Kernel()>;
  void register_kernel(const std::string& name, KernelBuilder factory);
  Kernel load_kernel(const std::string& name) const;
  std::vector<std::string> list_available_kernels() const;
-private:
+ private:
-  // FIXME: no benchmarking of maps done. The registry is expected to stay small, though
+  // FIXME: no benchmarking of maps done. The registry is expected to stay
  // small, though
  std::unordered_map<std::string, KernelBuilder> registry_;
 };
 void initialize_registry(KernelRegistry* registry, std::string strategy_name);
-#endif // KERNELS_HPP
+#endif  // KERNELS_HPP
--- a/include/strategy.hpp
+++ b/include/strategy.hpp
@ -2,71 +2,80 @@
 #define STRATEGY_HPP
 #include <omp.h>
 #include <eventify/task_system.hxx>
 #include <stdexcept>
 #include <string>
 #include <eventify/task_system.hxx>
-// Parallelization strategies are defined here. Assumption for now: there is always an outer loop than can be parallelized.
+// Parallelization strategies are defined here. Assumption for now: there is
-// The strategies are templates instanciated when adding kernels to the kernel registry.
+// always an outer loop than can be parallelized. The strategies are templates
-// Here, we only define the treatment of the outermost loop. The loop bodies are defined in kernels.cpp
+// instanciated when adding kernels to the kernel registry. 
 // Here, we only define the treatment of the outermost loop. The loop bodies 
 // are defined in kernels.cpp
 namespace strategy {
-  // define concept to ensure that the loop bodies defined in kernels.cpp represent one invocable iteration of a parallel loop 
+// define concept to ensure that the loop bodies defined in kernels.cpp
-  template <typename Func>
+// represent one invocable iteration of a parallel loop
-  concept invocable_with_int = requires(Func&& f, int i) {
+template <typename Func>
-      { std::forward<Func>(f)(i) };  // Checks if calling f(i) is valid
+concept invocable_with_int = requires(Func&& f, int i) {
-  };
+  { std::forward<Func>(f)(i) };  // Checks if calling f(i) is valid
 };
-  
+// for OpenMP, we just use the for pragma for the outermost loop
-  // for OpenMP, we just use the for pragma for the outermost loop
+template <typename Func>
  template <typename Func>
  requires invocable_with_int <Func>
  void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads, Func&& loop_body) {
      omp_set_num_threads(static_cast<int>(n_threads));
      #pragma omp parallel for schedule(static)
      for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {
          loop_body(i);
      }
  }
  // for eventify, we calculate indices for evenly divided chunks of the outermost loop,
  // create independent tasks and submit them to the tasking system
  template <typename Func>
  requires invocable_with_int <Func>
  void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks, Func&& loop_body) {
    auto task_system = eventify::task_system {};
    int tripcount = kernel_end_idx - kernel_start_idx + 1;
    int chunk_size = tripcount / n_tasks;
    int remainder = tripcount % n_tasks;
    for (int tid = 0; tid < n_tasks; ++tid) {
      auto task = [tid, tripcount, chunk_size, remainder, loop_body]{
        int start_idx = tid * chunk_size;
        int end_idx = start_idx + chunk_size - 1;
        if (tripcount - end_idx == remainder) end_idx += remainder;
        for (int i = start_idx; i < end_idx; ++i) {
          loop_body(i);
        }
      };
      task_system.submit(task);
    }
  }
  // parallelization strategy selector 
  template <typename Func>
  requires invocable_with_int<Func>
-  void execute_strategy(const std::string& strategy_name, int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks, Func&& loop_body) {
+void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads,
-      if (strategy_name == "omp") {
+                     Func&& loop_body) {
-          openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks, std::forward<Func>(loop_body));
+  omp_set_num_threads(static_cast<int>(n_threads));
      } else if (strategy_name == "eventify") {
          eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks, std::forward<Func>(loop_body)); 
      } else {
          throw std::invalid_argument("Unknown strategy: " + strategy_name);
      }
  }
 #pragma omp parallel for schedule(static)
  for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {
    loop_body(i);
  }
 }
-#endif //STRATEGY_HPP
+
 // for eventify, we calculate indices for evenly divided chunks of the outermost
 // loop, create independent tasks and submit them to the tasking system
 template <typename Func>
  requires invocable_with_int<Func>
 void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks,
                       Func&& loop_body) {
  auto task_system = eventify::task_system{};
  int tripcount = kernel_end_idx - kernel_start_idx + 1;
  int chunk_size = tripcount / n_tasks;
  int remainder = tripcount % n_tasks;
  for (int tid = 0; tid < n_tasks; ++tid) {
    auto task = [tid, tripcount, chunk_size, remainder, loop_body] {
      int start_idx = tid * chunk_size;
      int end_idx = start_idx + chunk_size - 1;
      if (tripcount - end_idx == remainder) end_idx += remainder;
      for (int i = start_idx; i < end_idx; ++i) {
        loop_body(i);
      }
    };
    task_system.submit(task);
  }
 }
 // parallelization strategy selector
 template <typename Func>
  requires invocable_with_int<Func>
 void execute_strategy(const std::string& strategy_name, int kernel_start_idx,
                      int kernel_end_idx, int num_threads_or_tasks,
                      Func&& loop_body) {
  if (strategy_name == "omp") {
    openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
                    std::forward<Func>(loop_body));
  } else if (strategy_name == "eventify") {
    eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
                      std::forward<Func>(loop_body));
  } else {
    throw std::invalid_argument("Unknown strategy: " + strategy_name);
  }
 }
 }  // namespace strategy
 #endif  // STRATEGY_HPP
--- a/include/utils.hpp
+++ b/include/utils.hpp
@ -5,12 +5,12 @@
 // Function to initialize a vector with random numbers
 void initialize_vector(std::vector<float>& v) {
-    std::random_device rd;
+  std::random_device rd;
-    std::mt19937 gen(rd());
+  std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> dis(0.0f, 1.0f);
+  std::uniform_real_distribution<float> dis(0.0f, 1.0f);
-    for (auto& elem : v) {
+  for (auto& elem : v) {
-        elem = dis(gen);
+    elem = dis(gen);
-    }
+  }
 }
-#endif //UTILS_HPP
+#endif  // UTILS_HPP
--- a/src/kernels.cpp
+++ b/src/kernels.cpp
@ -1,21 +1,26 @@
 #include "kernels.hpp"
 #include <memory>
 #include <stdexcept>
-#include "kernels.hpp"
+
 #include "strategy.hpp"
 #include "utils.hpp"
-Kernel::Kernel(const std::string& name, Kernel::StrategyFunction strategy_function, Kernel::PreparationFunction preparation_function)
+Kernel::Kernel(const std::string& name,
-  : name_(name), strategy_function_(std::move(strategy_function)), preparation_function_(std::move(preparation_function)) {}
+               Kernel::StrategyFunction strategy_function,
               Kernel::PreparationFunction preparation_function)
    : name_(name),
      strategy_function_(std::move(strategy_function)),
      preparation_function_(std::move(preparation_function)) {}
-void Kernel::prepare() const {
+void Kernel::prepare() const { preparation_function_(); }
  preparation_function_();
 }
 void Kernel::execute(int num_threads_or_tasks, int kernel_tripcount) const {
  strategy_function_(0, kernel_tripcount, num_threads_or_tasks);
 }
-void KernelRegistry::register_kernel(const std::string& name, KernelBuilder factory) {
+void KernelRegistry::register_kernel(const std::string& name,
                                     KernelBuilder factory) {
  registry_.emplace(name, std::move(factory));
 }
@ -35,11 +40,11 @@ std::vector<std::string> KernelRegistry::list_available_kernels() const {
  return kernel_names;
 }
-// New kernels go here, each can have it's own set of arguments and initializations
+// New kernels go here, each can have it's own set of arguments and
-// execute() contains the full kernel code minus an outer for loop (i=start, i<end, ++i), 
+// initializations execute() contains the full kernel code minus an outer for
-// defined in the respective parallelization strategy
+// loop (i=start, i<end, ++i), defined in the respective parallelization
 // strategy
 void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
  // STREAM TRIAD
  registry->register_kernel("stream_triad", [&]() {
    auto a = std::make_shared<std::vector<float>>();
@ -54,10 +59,11 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
      initialize_vector(*c);
    };
-    auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) {
+    auto execute = [=](int kernel_start_idx, int kernel_end_idx,
-      strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) {
+                       int num_threads_or_tasks) {
-        (*a)[i] = (*b)[i] + 0.5f * (*c)[i];
+      strategy::execute_strategy(
-      });
+          strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
          [&](int i) { (*a)[i] = (*b)[i] + 0.5f * (*c)[i]; });
    };
    return Kernel("stream_triad", execute, prepare);
@ -74,10 +80,11 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
      initialize_vector(*b);
    };
-    auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) {
+    auto execute = [=](int kernel_start_idx, int kernel_end_idx,
-      strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) {
+                       int num_threads_or_tasks) {
-        (*a)[i] += 0.5f * (*b)[i];
+      strategy::execute_strategy(strategy_name, kernel_start_idx,
-      });
+                                 kernel_end_idx, num_threads_or_tasks,
                                 [&](int i) { (*a)[i] += 0.5f * (*b)[i]; });
    };
    return Kernel("daxpy", execute, prepare);
@ -107,15 +114,18 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
      initialize_vector(*rz);
    };
-    auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) {
+    auto execute = [=](int kernel_start_idx, int kernel_end_idx,
-      strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) {
+                       int num_threads_or_tasks) {
-        (*potential)[i] = (*charge1)[i] * (*charge2)[i] / std::sqrt((*rx)[i] * (*rx)[i] + (*ry)[i] * (*ry)[i] + (*rz)[i] * (*rz)[i]);
+      strategy::execute_strategy(
-      });
+          strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
          [&](int i) {
            (*potential)[i] =
                (*charge1)[i] * (*charge2)[i] /
                std::sqrt((*rx)[i] * (*rx)[i] + (*ry)[i] * (*ry)[i] +
                          (*rz)[i] * (*rz)[i]);
          });
    };
    return Kernel("coulomb", execute, prepare);
  });
 }
--- a/src/main.cpp
+++ b/src/main.cpp
@ -1,51 +1,54 @@
 #include <iostream>
 #include <chrono>
 #include <iostream>
 #include "kernels.hpp"
 int main(int argc, char** argv) {
  if (argc != 4) {
    std::cerr << "Usage: " << argv[0]
              << " <kernel_name> <strategy> <num_threads_or_tasks>\n";
    return 1;
  }
-    if (argc != 4) {
+  std::string kernel_name = argv[1];
-        std::cerr << "Usage: " << argv[0] << " <kernel_name> <strategy> <num_threads_or_tasks>\n";
+  std::string strategy_name = argv[2];
-        return 1;
+  int num_threads_or_tasks = std::stoul(argv[3]);
  // registry contains a map of kernels generated from kernel builders for the
  // selected parallelization strategy
  KernelRegistry registry;
  initialize_registry(&registry, strategy_name);
  try {
    // find kernel in unordered_map by it's name. prepare() allocates and
    // initializes data structures needed for the selected kernel
    Kernel kernel = registry.load_kernel(kernel_name);
    kernel.prepare();
    // Time the kernel execution
    auto start_time = std::chrono::high_resolution_clock::now();
    // VECTOR_SIZE is a preprocessor variable to mimic the setup of STREAM
    kernel.execute(num_threads_or_tasks, VECTOR_SIZE);
    auto end_time = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::milli> duration = end_time - start_time;
    std::cout << "Kernel: " << kernel_name << "\n";
    std::cout << "Parallelization strategy: " << strategy_name << "\n";
    std::cout << "Number of threads / tasks: " << num_threads_or_tasks << "\n";
    std::cout << "Kernel execution time [ms]: " << duration.count() << "\n";
  } catch (const std::invalid_argument& e) {
    // If kernel name is invalid, list available kernels
    std::cerr << e.what() << "\n";
    std::cerr << "Available kernels are:\n";
    for (const auto& kernel_name : registry.list_available_kernels()) {
      std::cerr << "  - " << kernel_name << "\n";
    }
-    std::string kernel_name = argv[1];
+    return 1;
-    std::string strategy_name = argv[2];
+  }
    int num_threads_or_tasks = std::stoul(argv[3]);
-    // registry contains a map of kernels generated from kernel builders for the selected parallelization strategy
+  return 0;
    KernelRegistry registry;
    initialize_registry(&registry, strategy_name);
    try{ 
      // find kernel in unordered_map by it's name. prepare() allocates and initializes data structures needed for the selected kernel
      Kernel kernel = registry.load_kernel(kernel_name);
      kernel.prepare();
      // Time the kernel execution
      auto start_time = std::chrono::high_resolution_clock::now();
      // VECTOR_SIZE is a preprocessor variable to mimic the setup of STREAM
      kernel.execute(num_threads_or_tasks, VECTOR_SIZE);
      auto end_time = std::chrono::high_resolution_clock::now();
      std::chrono::duration<double, std::milli> duration = end_time - start_time;
      std::cout << "Kernel: " << kernel_name << "\n";
      std::cout << "Parallelization strategy: " << strategy_name << "\n";
      std::cout << "Number of threads / tasks: " << num_threads_or_tasks << "\n";
      std::cout << "Kernel execution time [ms]: " << duration.count() << "\n";
    } catch (const std::invalid_argument& e) {
        // If kernel name is invalid, list available kernels
        std::cerr << e.what() << "\n";
        std::cerr << "Available kernels are:\n";
        // List available kernels from registry
        for (const auto& kernel_name : registry.list_available_kernels()) {
            std::cerr << "  - " << kernel_name << "\n";
        }
        return 1;
    }
    return 0;
 }