Applied Google formatting

2024-12-13 12:01:35 +01:00 · 2024-12-13 12:01:35 +01:00 · 3f9e253f23
parent 8dd3de290b
commit 3f9e253f23
5 changed files with 171 additions and 149 deletions
--- a/include/kernels.hpp
+++ b/include/kernels.hpp
@ -1,41 +1,41 @@
 #ifndef KERNELS_HPP
 #define KERNELS_HPP

-#include <string>
 #include <functional>
+#include <string>
 #include <unordered_map>

-
 class Kernel {
-public:
+ public:
  using StrategyFunction = std::function<void(int, int, int)>;
  using PreparationFunction = std::function<void()>;

-  Kernel(const std::string& name, StrategyFunction strategy_function, PreparationFunction preparation_function);
+  Kernel(const std::string& name, StrategyFunction strategy_function,
+         PreparationFunction preparation_function);

  void prepare() const;
  void execute(int n_threads_or_tasks, int kernel_tripcount) const;

-private:
+ private:
  std::string name_;
  StrategyFunction strategy_function_;
  PreparationFunction preparation_function_;
 };

 class KernelRegistry {
-public:
+ public:
  using KernelBuilder = std::function<Kernel()>;

  void register_kernel(const std::string& name, KernelBuilder factory);
  Kernel load_kernel(const std::string& name) const;
  std::vector<std::string> list_available_kernels() const;

-private:
-  // FIXME: no benchmarking of maps done. The registry is expected to stay small, though
+ private:
+  // FIXME: no benchmarking of maps done. The registry is expected to stay
+  // small, though
  std::unordered_map<std::string, KernelBuilder> registry_;
 };

 void initialize_registry(KernelRegistry* registry, std::string strategy_name);

-#endif // KERNELS_HPP
-
+#endif  // KERNELS_HPP
--- a/include/strategy.hpp
+++ b/include/strategy.hpp
@ -2,71 +2,80 @@
 #define STRATEGY_HPP

 #include <omp.h>
+
+#include <eventify/task_system.hxx>
 #include <stdexcept>
 #include <string>
-#include <eventify/task_system.hxx>

-// Parallelization strategies are defined here. Assumption for now: there is always an outer loop than can be parallelized.
-// The strategies are templates instanciated when adding kernels to the kernel registry.
-// Here, we only define the treatment of the outermost loop. The loop bodies are defined in kernels.cpp
+// Parallelization strategies are defined here. Assumption for now: there is
+// always an outer loop than can be parallelized. The strategies are templates
+// instanciated when adding kernels to the kernel registry. 
+// Here, we only define the treatment of the outermost loop. The loop bodies 
+// are defined in kernels.cpp

 namespace strategy {

-  // define concept to ensure that the loop bodies defined in kernels.cpp represent one invocable iteration of a parallel loop 
-  template <typename Func>
-  concept invocable_with_int = requires(Func&& f, int i) {
-      { std::forward<Func>(f)(i) };  // Checks if calling f(i) is valid
-  };
-  
-  
-  // for OpenMP, we just use the for pragma for the outermost loop
-  template <typename Func>
-  requires invocable_with_int <Func>
-  void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads, Func&& loop_body) {
-      omp_set_num_threads(static_cast<int>(n_threads));
-  
-      #pragma omp parallel for schedule(static)
-      for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {
-          loop_body(i);
-      }
-  }
-  
-  // for eventify, we calculate indices for evenly divided chunks of the outermost loop,
-  // create independent tasks and submit them to the tasking system
-  template <typename Func>
-  requires invocable_with_int <Func>
-  void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks, Func&& loop_body) {
-    auto task_system = eventify::task_system {};
-    int tripcount = kernel_end_idx - kernel_start_idx + 1;
-    int chunk_size = tripcount / n_tasks;
-    int remainder = tripcount % n_tasks;
-  
-    for (int tid = 0; tid < n_tasks; ++tid) {
-      auto task = [tid, tripcount, chunk_size, remainder, loop_body]{
-        int start_idx = tid * chunk_size;
-        int end_idx = start_idx + chunk_size - 1;
-        if (tripcount - end_idx == remainder) end_idx += remainder;
-        
-        for (int i = start_idx; i < end_idx; ++i) {
-          loop_body(i);
-        }
-      };
-      task_system.submit(task);
-    }
-  }
-  
-  // parallelization strategy selector 
-  template <typename Func>
-  requires invocable_with_int<Func>
-  void execute_strategy(const std::string& strategy_name, int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks, Func&& loop_body) {
-      if (strategy_name == "omp") {
-          openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks, std::forward<Func>(loop_body));
-      } else if (strategy_name == "eventify") {
-          eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks, std::forward<Func>(loop_body)); 
-      } else {
-          throw std::invalid_argument("Unknown strategy: " + strategy_name);
-      }
-  }
+// define concept to ensure that the loop bodies defined in kernels.cpp
+// represent one invocable iteration of a parallel loop
+template <typename Func>
+concept invocable_with_int = requires(Func&& f, int i) {
+  { std::forward<Func>(f)(i) };  // Checks if calling f(i) is valid
+};

+// for OpenMP, we just use the for pragma for the outermost loop
+template <typename Func>
+  requires invocable_with_int<Func>
+void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads,
+                     Func&& loop_body) {
+  omp_set_num_threads(static_cast<int>(n_threads));
+
+#pragma omp parallel for schedule(static)
+  for (int i = kernel_start_idx; i < kernel_end_idx; ++i) {
+    loop_body(i);
+  }
 }
-#endif //STRATEGY_HPP
+
+// for eventify, we calculate indices for evenly divided chunks of the outermost
+// loop, create independent tasks and submit them to the tasking system
+template <typename Func>
+  requires invocable_with_int<Func>
+void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks,
+                       Func&& loop_body) {
+  auto task_system = eventify::task_system{};
+  int tripcount = kernel_end_idx - kernel_start_idx + 1;
+  int chunk_size = tripcount / n_tasks;
+  int remainder = tripcount % n_tasks;
+
+  for (int tid = 0; tid < n_tasks; ++tid) {
+    auto task = [tid, tripcount, chunk_size, remainder, loop_body] {
+      int start_idx = tid * chunk_size;
+      int end_idx = start_idx + chunk_size - 1;
+      if (tripcount - end_idx == remainder) end_idx += remainder;
+
+      for (int i = start_idx; i < end_idx; ++i) {
+        loop_body(i);
+      }
+    };
+    task_system.submit(task);
+  }
+}
+
+// parallelization strategy selector
+template <typename Func>
+  requires invocable_with_int<Func>
+void execute_strategy(const std::string& strategy_name, int kernel_start_idx,
+                      int kernel_end_idx, int num_threads_or_tasks,
+                      Func&& loop_body) {
+  if (strategy_name == "omp") {
+    openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
+                    std::forward<Func>(loop_body));
+  } else if (strategy_name == "eventify") {
+    eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
+                      std::forward<Func>(loop_body));
+  } else {
+    throw std::invalid_argument("Unknown strategy: " + strategy_name);
+  }
+}
+
+}  // namespace strategy
+#endif  // STRATEGY_HPP
--- a/include/utils.hpp
+++ b/include/utils.hpp
@ -5,12 +5,12 @@

 // Function to initialize a vector with random numbers
 void initialize_vector(std::vector<float>& v) {
-    std::random_device rd;
-    std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> dis(0.0f, 1.0f);
-    for (auto& elem : v) {
-        elem = dis(gen);
-    }
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<float> dis(0.0f, 1.0f);
+  for (auto& elem : v) {
+    elem = dis(gen);
+  }
 }

-#endif //UTILS_HPP
+#endif  // UTILS_HPP
--- a/src/kernels.cpp
+++ b/src/kernels.cpp
@ -1,21 +1,26 @@
+#include "kernels.hpp"
+
 #include <memory>
 #include <stdexcept>
-#include "kernels.hpp"
+
 #include "strategy.hpp"
 #include "utils.hpp"

-Kernel::Kernel(const std::string& name, Kernel::StrategyFunction strategy_function, Kernel::PreparationFunction preparation_function)
-  : name_(name), strategy_function_(std::move(strategy_function)), preparation_function_(std::move(preparation_function)) {}
+Kernel::Kernel(const std::string& name,
+               Kernel::StrategyFunction strategy_function,
+               Kernel::PreparationFunction preparation_function)
+    : name_(name),
+      strategy_function_(std::move(strategy_function)),
+      preparation_function_(std::move(preparation_function)) {}

-void Kernel::prepare() const {
-  preparation_function_();
-}
+void Kernel::prepare() const { preparation_function_(); }

 void Kernel::execute(int num_threads_or_tasks, int kernel_tripcount) const {
  strategy_function_(0, kernel_tripcount, num_threads_or_tasks);
 }

-void KernelRegistry::register_kernel(const std::string& name, KernelBuilder factory) {
+void KernelRegistry::register_kernel(const std::string& name,
+                                     KernelBuilder factory) {
  registry_.emplace(name, std::move(factory));
 }

@ -35,17 +40,17 @@ std::vector<std::string> KernelRegistry::list_available_kernels() const {
  return kernel_names;
 }

-// New kernels go here, each can have it's own set of arguments and initializations
-// execute() contains the full kernel code minus an outer for loop (i=start, i<end, ++i), 
-// defined in the respective parallelization strategy
+// New kernels go here, each can have it's own set of arguments and
+// initializations execute() contains the full kernel code minus an outer for
+// loop (i=start, i<end, ++i), defined in the respective parallelization
+// strategy
 void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
-
  // STREAM TRIAD
  registry->register_kernel("stream_triad", [&]() {
    auto a = std::make_shared<std::vector<float>>();
    auto b = std::make_shared<std::vector<float>>();
    auto c = std::make_shared<std::vector<float>>();
-    
+
    auto prepare = [=]() {
      a->resize(VECTOR_SIZE);
      b->resize(VECTOR_SIZE);
@ -54,10 +59,11 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
      initialize_vector(*c);
    };

-    auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) {
-      strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) {
-        (*a)[i] = (*b)[i] + 0.5f * (*c)[i];
-      });
+    auto execute = [=](int kernel_start_idx, int kernel_end_idx,
+                       int num_threads_or_tasks) {
+      strategy::execute_strategy(
+          strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
+          [&](int i) { (*a)[i] = (*b)[i] + 0.5f * (*c)[i]; });
    };

    return Kernel("stream_triad", execute, prepare);
@ -74,10 +80,11 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
      initialize_vector(*b);
    };

-    auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) {
-      strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) {
-        (*a)[i] += 0.5f * (*b)[i];
-      });
+    auto execute = [=](int kernel_start_idx, int kernel_end_idx,
+                       int num_threads_or_tasks) {
+      strategy::execute_strategy(strategy_name, kernel_start_idx,
+                                 kernel_end_idx, num_threads_or_tasks,
+                                 [&](int i) { (*a)[i] += 0.5f * (*b)[i]; });
    };

    return Kernel("daxpy", execute, prepare);
@ -106,16 +113,19 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
      initialize_vector(*ry);
      initialize_vector(*rz);
    };
-                                                                                                                     
-    auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) {
-      strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) {
-        (*potential)[i] = (*charge1)[i] * (*charge2)[i] / std::sqrt((*rx)[i] * (*rx)[i] + (*ry)[i] * (*ry)[i] + (*rz)[i] * (*rz)[i]);
-      });
+
+    auto execute = [=](int kernel_start_idx, int kernel_end_idx,
+                       int num_threads_or_tasks) {
+      strategy::execute_strategy(
+          strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
+          [&](int i) {
+            (*potential)[i] =
+                (*charge1)[i] * (*charge2)[i] /
+                std::sqrt((*rx)[i] * (*rx)[i] + (*ry)[i] * (*ry)[i] +
+                          (*rz)[i] * (*rz)[i]);
+          });
    };
-                                                                                                                     
+
    return Kernel("coulomb", execute, prepare);
  });
-
-
 }
-
--- a/src/main.cpp
+++ b/src/main.cpp
@ -1,51 +1,54 @@
-#include <iostream>
 #include <chrono>
+#include <iostream>
+
 #include "kernels.hpp"

 int main(int argc, char** argv) {
+  if (argc != 4) {
+    std::cerr << "Usage: " << argv[0]
+              << " <kernel_name> <strategy> <num_threads_or_tasks>\n";
+    return 1;
+  }

-    if (argc != 4) {
-        std::cerr << "Usage: " << argv[0] << " <kernel_name> <strategy> <num_threads_or_tasks>\n";
-        return 1;
+  std::string kernel_name = argv[1];
+  std::string strategy_name = argv[2];
+  int num_threads_or_tasks = std::stoul(argv[3]);
+
+  // registry contains a map of kernels generated from kernel builders for the
+  // selected parallelization strategy
+  KernelRegistry registry;
+  initialize_registry(&registry, strategy_name);
+
+  try {
+    // find kernel in unordered_map by it's name. prepare() allocates and
+    // initializes data structures needed for the selected kernel
+    Kernel kernel = registry.load_kernel(kernel_name);
+    kernel.prepare();
+
+    // Time the kernel execution
+    auto start_time = std::chrono::high_resolution_clock::now();
+
+    // VECTOR_SIZE is a preprocessor variable to mimic the setup of STREAM
+    kernel.execute(num_threads_or_tasks, VECTOR_SIZE);
+
+    auto end_time = std::chrono::high_resolution_clock::now();
+    std::chrono::duration<double, std::milli> duration = end_time - start_time;
+
+    std::cout << "Kernel: " << kernel_name << "\n";
+    std::cout << "Parallelization strategy: " << strategy_name << "\n";
+    std::cout << "Number of threads / tasks: " << num_threads_or_tasks << "\n";
+    std::cout << "Kernel execution time [ms]: " << duration.count() << "\n";
+  
+  } catch (const std::invalid_argument& e) {
+    // If kernel name is invalid, list available kernels
+    std::cerr << e.what() << "\n";
+    std::cerr << "Available kernels are:\n";
+    for (const auto& kernel_name : registry.list_available_kernels()) {
+      std::cerr << "  - " << kernel_name << "\n";
    }
-
-    std::string kernel_name = argv[1];
-    std::string strategy_name = argv[2];
-    int num_threads_or_tasks = std::stoul(argv[3]);
    
-    // registry contains a map of kernels generated from kernel builders for the selected parallelization strategy
-    KernelRegistry registry;
-    initialize_registry(&registry, strategy_name);
-    
-    try{ 
-      // find kernel in unordered_map by it's name. prepare() allocates and initializes data structures needed for the selected kernel
-      Kernel kernel = registry.load_kernel(kernel_name);
-      kernel.prepare();
-
-      // Time the kernel execution
-      auto start_time = std::chrono::high_resolution_clock::now();
-      
-      // VECTOR_SIZE is a preprocessor variable to mimic the setup of STREAM
-      kernel.execute(num_threads_or_tasks, VECTOR_SIZE);
-      
-      auto end_time = std::chrono::high_resolution_clock::now();
-      std::chrono::duration<double, std::milli> duration = end_time - start_time;
-
-      std::cout << "Kernel: " << kernel_name << "\n";
-      std::cout << "Parallelization strategy: " << strategy_name << "\n";
-      std::cout << "Number of threads / tasks: " << num_threads_or_tasks << "\n";
-      std::cout << "Kernel execution time [ms]: " << duration.count() << "\n";
-    } catch (const std::invalid_argument& e) {
-        // If kernel name is invalid, list available kernels
-        std::cerr << e.what() << "\n";
-        std::cerr << "Available kernels are:\n";
-        
-        // List available kernels from registry
-        for (const auto& kernel_name : registry.list_available_kernels()) {
-            std::cerr << "  - " << kernel_name << "\n";
-        }
-
-        return 1;
-    }
-    return 0;
+    return 1;
+  }
+  
+  return 0;
 }