Applied Google formatting

This commit is contained in:
Patrick Lipka 2024-12-13 12:01:35 +01:00
parent 8dd3de290b
commit 3f9e253f23
5 changed files with 171 additions and 149 deletions

View File

@ -1,17 +1,17 @@
#ifndef KERNELS_HPP #ifndef KERNELS_HPP
#define KERNELS_HPP #define KERNELS_HPP
#include <string>
#include <functional> #include <functional>
#include <string>
#include <unordered_map> #include <unordered_map>
class Kernel { class Kernel {
public: public:
using StrategyFunction = std::function<void(int, int, int)>; using StrategyFunction = std::function<void(int, int, int)>;
using PreparationFunction = std::function<void()>; using PreparationFunction = std::function<void()>;
Kernel(const std::string& name, StrategyFunction strategy_function, PreparationFunction preparation_function); Kernel(const std::string& name, StrategyFunction strategy_function,
PreparationFunction preparation_function);
void prepare() const; void prepare() const;
void execute(int n_threads_or_tasks, int kernel_tripcount) const; void execute(int n_threads_or_tasks, int kernel_tripcount) const;
@ -31,11 +31,11 @@ public:
std::vector<std::string> list_available_kernels() const; std::vector<std::string> list_available_kernels() const;
private: private:
// FIXME: no benchmarking of maps done. The registry is expected to stay small, though // FIXME: no benchmarking of maps done. The registry is expected to stay
// small, though
std::unordered_map<std::string, KernelBuilder> registry_; std::unordered_map<std::string, KernelBuilder> registry_;
}; };
void initialize_registry(KernelRegistry* registry, std::string strategy_name); void initialize_registry(KernelRegistry* registry, std::string strategy_name);
#endif // KERNELS_HPP #endif // KERNELS_HPP

View File

@ -2,27 +2,31 @@
#define STRATEGY_HPP #define STRATEGY_HPP
#include <omp.h> #include <omp.h>
#include <eventify/task_system.hxx>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <eventify/task_system.hxx>
// Parallelization strategies are defined here. Assumption for now: there is always an outer loop than can be parallelized. // Parallelization strategies are defined here. Assumption for now: there is
// The strategies are templates instanciated when adding kernels to the kernel registry. // always an outer loop than can be parallelized. The strategies are templates
// Here, we only define the treatment of the outermost loop. The loop bodies are defined in kernels.cpp // instanciated when adding kernels to the kernel registry.
// Here, we only define the treatment of the outermost loop. The loop bodies
// are defined in kernels.cpp
namespace strategy { namespace strategy {
// define concept to ensure that the loop bodies defined in kernels.cpp represent one invocable iteration of a parallel loop // define concept to ensure that the loop bodies defined in kernels.cpp
// represent one invocable iteration of a parallel loop
template <typename Func> template <typename Func>
concept invocable_with_int = requires(Func&& f, int i) { concept invocable_with_int = requires(Func&& f, int i) {
{ std::forward<Func>(f)(i) }; // Checks if calling f(i) is valid { std::forward<Func>(f)(i) }; // Checks if calling f(i) is valid
}; };
// for OpenMP, we just use the for pragma for the outermost loop // for OpenMP, we just use the for pragma for the outermost loop
template <typename Func> template <typename Func>
requires invocable_with_int<Func> requires invocable_with_int<Func>
void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads, Func&& loop_body) { void openmp_strategy(int kernel_start_idx, int kernel_end_idx, int n_threads,
Func&& loop_body) {
omp_set_num_threads(static_cast<int>(n_threads)); omp_set_num_threads(static_cast<int>(n_threads));
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
@ -31,11 +35,12 @@ namespace strategy {
} }
} }
// for eventify, we calculate indices for evenly divided chunks of the outermost loop, // for eventify, we calculate indices for evenly divided chunks of the outermost
// create independent tasks and submit them to the tasking system // loop, create independent tasks and submit them to the tasking system
template <typename Func> template <typename Func>
requires invocable_with_int<Func> requires invocable_with_int<Func>
void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks, Func&& loop_body) { void eventify_strategy(int kernel_start_idx, int kernel_end_idx, int n_tasks,
Func&& loop_body) {
auto task_system = eventify::task_system{}; auto task_system = eventify::task_system{};
int tripcount = kernel_end_idx - kernel_start_idx + 1; int tripcount = kernel_end_idx - kernel_start_idx + 1;
int chunk_size = tripcount / n_tasks; int chunk_size = tripcount / n_tasks;
@ -58,15 +63,19 @@ namespace strategy {
// parallelization strategy selector // parallelization strategy selector
template <typename Func> template <typename Func>
requires invocable_with_int<Func> requires invocable_with_int<Func>
void execute_strategy(const std::string& strategy_name, int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks, Func&& loop_body) { void execute_strategy(const std::string& strategy_name, int kernel_start_idx,
int kernel_end_idx, int num_threads_or_tasks,
Func&& loop_body) {
if (strategy_name == "omp") { if (strategy_name == "omp") {
openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks, std::forward<Func>(loop_body)); openmp_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
std::forward<Func>(loop_body));
} else if (strategy_name == "eventify") { } else if (strategy_name == "eventify") {
eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks, std::forward<Func>(loop_body)); eventify_strategy(kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
std::forward<Func>(loop_body));
} else { } else {
throw std::invalid_argument("Unknown strategy: " + strategy_name); throw std::invalid_argument("Unknown strategy: " + strategy_name);
} }
} }
} } // namespace strategy
#endif // STRATEGY_HPP #endif // STRATEGY_HPP

View File

@ -1,21 +1,26 @@
#include "kernels.hpp"
#include <memory> #include <memory>
#include <stdexcept> #include <stdexcept>
#include "kernels.hpp"
#include "strategy.hpp" #include "strategy.hpp"
#include "utils.hpp" #include "utils.hpp"
Kernel::Kernel(const std::string& name, Kernel::StrategyFunction strategy_function, Kernel::PreparationFunction preparation_function) Kernel::Kernel(const std::string& name,
: name_(name), strategy_function_(std::move(strategy_function)), preparation_function_(std::move(preparation_function)) {} Kernel::StrategyFunction strategy_function,
Kernel::PreparationFunction preparation_function)
: name_(name),
strategy_function_(std::move(strategy_function)),
preparation_function_(std::move(preparation_function)) {}
void Kernel::prepare() const { void Kernel::prepare() const { preparation_function_(); }
preparation_function_();
}
void Kernel::execute(int num_threads_or_tasks, int kernel_tripcount) const { void Kernel::execute(int num_threads_or_tasks, int kernel_tripcount) const {
strategy_function_(0, kernel_tripcount, num_threads_or_tasks); strategy_function_(0, kernel_tripcount, num_threads_or_tasks);
} }
void KernelRegistry::register_kernel(const std::string& name, KernelBuilder factory) { void KernelRegistry::register_kernel(const std::string& name,
KernelBuilder factory) {
registry_.emplace(name, std::move(factory)); registry_.emplace(name, std::move(factory));
} }
@ -35,11 +40,11 @@ std::vector<std::string> KernelRegistry::list_available_kernels() const {
return kernel_names; return kernel_names;
} }
// New kernels go here, each can have it's own set of arguments and initializations // New kernels go here, each can have it's own set of arguments and
// execute() contains the full kernel code minus an outer for loop (i=start, i<end, ++i), // initializations execute() contains the full kernel code minus an outer for
// defined in the respective parallelization strategy // loop (i=start, i<end, ++i), defined in the respective parallelization
// strategy
void initialize_registry(KernelRegistry* registry, std::string strategy_name) { void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
// STREAM TRIAD // STREAM TRIAD
registry->register_kernel("stream_triad", [&]() { registry->register_kernel("stream_triad", [&]() {
auto a = std::make_shared<std::vector<float>>(); auto a = std::make_shared<std::vector<float>>();
@ -54,10 +59,11 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
initialize_vector(*c); initialize_vector(*c);
}; };
auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) { auto execute = [=](int kernel_start_idx, int kernel_end_idx,
strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) { int num_threads_or_tasks) {
(*a)[i] = (*b)[i] + 0.5f * (*c)[i]; strategy::execute_strategy(
}); strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
[&](int i) { (*a)[i] = (*b)[i] + 0.5f * (*c)[i]; });
}; };
return Kernel("stream_triad", execute, prepare); return Kernel("stream_triad", execute, prepare);
@ -74,10 +80,11 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
initialize_vector(*b); initialize_vector(*b);
}; };
auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) { auto execute = [=](int kernel_start_idx, int kernel_end_idx,
strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) { int num_threads_or_tasks) {
(*a)[i] += 0.5f * (*b)[i]; strategy::execute_strategy(strategy_name, kernel_start_idx,
}); kernel_end_idx, num_threads_or_tasks,
[&](int i) { (*a)[i] += 0.5f * (*b)[i]; });
}; };
return Kernel("daxpy", execute, prepare); return Kernel("daxpy", execute, prepare);
@ -107,15 +114,18 @@ void initialize_registry(KernelRegistry* registry, std::string strategy_name) {
initialize_vector(*rz); initialize_vector(*rz);
}; };
auto execute = [=](int kernel_start_idx, int kernel_end_idx, int num_threads_or_tasks) { auto execute = [=](int kernel_start_idx, int kernel_end_idx,
strategy::execute_strategy(strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks, [&](int i) { int num_threads_or_tasks) {
(*potential)[i] = (*charge1)[i] * (*charge2)[i] / std::sqrt((*rx)[i] * (*rx)[i] + (*ry)[i] * (*ry)[i] + (*rz)[i] * (*rz)[i]); strategy::execute_strategy(
strategy_name, kernel_start_idx, kernel_end_idx, num_threads_or_tasks,
[&](int i) {
(*potential)[i] =
(*charge1)[i] * (*charge2)[i] /
std::sqrt((*rx)[i] * (*rx)[i] + (*ry)[i] * (*ry)[i] +
(*rz)[i] * (*rz)[i]);
}); });
}; };
return Kernel("coulomb", execute, prepare); return Kernel("coulomb", execute, prepare);
}); });
} }

View File

@ -1,11 +1,12 @@
#include <iostream>
#include <chrono> #include <chrono>
#include <iostream>
#include "kernels.hpp" #include "kernels.hpp"
int main(int argc, char** argv) { int main(int argc, char** argv) {
if (argc != 4) { if (argc != 4) {
std::cerr << "Usage: " << argv[0] << " <kernel_name> <strategy> <num_threads_or_tasks>\n"; std::cerr << "Usage: " << argv[0]
<< " <kernel_name> <strategy> <num_threads_or_tasks>\n";
return 1; return 1;
} }
@ -13,12 +14,14 @@ int main(int argc, char** argv) {
std::string strategy_name = argv[2]; std::string strategy_name = argv[2];
int num_threads_or_tasks = std::stoul(argv[3]); int num_threads_or_tasks = std::stoul(argv[3]);
// registry contains a map of kernels generated from kernel builders for the selected parallelization strategy // registry contains a map of kernels generated from kernel builders for the
// selected parallelization strategy
KernelRegistry registry; KernelRegistry registry;
initialize_registry(&registry, strategy_name); initialize_registry(&registry, strategy_name);
try { try {
// find kernel in unordered_map by it's name. prepare() allocates and initializes data structures needed for the selected kernel // find kernel in unordered_map by it's name. prepare() allocates and
// initializes data structures needed for the selected kernel
Kernel kernel = registry.load_kernel(kernel_name); Kernel kernel = registry.load_kernel(kernel_name);
kernel.prepare(); kernel.prepare();
@ -35,17 +38,17 @@ int main(int argc, char** argv) {
std::cout << "Parallelization strategy: " << strategy_name << "\n"; std::cout << "Parallelization strategy: " << strategy_name << "\n";
std::cout << "Number of threads / tasks: " << num_threads_or_tasks << "\n"; std::cout << "Number of threads / tasks: " << num_threads_or_tasks << "\n";
std::cout << "Kernel execution time [ms]: " << duration.count() << "\n"; std::cout << "Kernel execution time [ms]: " << duration.count() << "\n";
} catch (const std::invalid_argument& e) { } catch (const std::invalid_argument& e) {
// If kernel name is invalid, list available kernels // If kernel name is invalid, list available kernels
std::cerr << e.what() << "\n"; std::cerr << e.what() << "\n";
std::cerr << "Available kernels are:\n"; std::cerr << "Available kernels are:\n";
// List available kernels from registry
for (const auto& kernel_name : registry.list_available_kernels()) { for (const auto& kernel_name : registry.list_available_kernels()) {
std::cerr << " - " << kernel_name << "\n"; std::cerr << " - " << kernel_name << "\n";
} }
return 1; return 1;
} }
return 0; return 0;
} }