Documentation:
Book:
%%writefile example.cpp
#include <future>
#include <iostream>
int add(int a, int b)
{
return a + b;
}
int main(void)
{
int a = 5;
int b = 5;
std::future<int> result = std::async(add,a,b);
std::cout<< result.get() << std::endl;
return EXIT_SUCCESS;
}
We need to add -phtread
for the asynchronous execution
%%bash
g++ example.cpp -pthread -o example
%%bash
./example
#include<run_hpx.cpp>
#include <hpx/future.hpp>
#include <iostream>
int add(int a, int b)
{
return a + b;
}
run_hpx([](){
hpx::lcos::future<int> result = hpx::async(add,5,5);
std::cout << result.get() ;
});
For this example the Taylor series for the $\sin(x)$ function is computed. The Taylor series is given by,
$$ \sin(x) \approx = \sum\limits_{n=0}^N (-1)^{n-1} \frac{x^{2n}}{(2n)!}.$$For the concurrent computation, the interval $[0, N]$ is split in two
partitions from $[0, N/2]$ and $[(N/2)+1, N]$, and these are computed
asynchronously using hpx::async
. Note that each asynchronous function call
returns an hpx::future
which is needed to synchronize the collection
of the partial results.
#include <cmath>
// Define the partial Taylor function
double taylor(size_t begin, size_t end, size_t n, double x)
{
double denom = fact(2 * n);
double res = 0;
for (size_t i = begin; i != end; ++i)
{
res += std::pow(-1, i - 1) * std::pow(x, 2 * n) / denom;
}
return res;
}
run_hpx([](){
// Compute the Taylor series sin(2.0) for 25 iterations
size_t n = 25;
// Launch two concurrent computations of each partial result
hpx::future<double> f1 = hpx::async(taylor, 0, n / 2, n, 2.);
hpx::future<double> f2 = hpx::async(taylor, (n / 2) + 1, n, n, 2.);
// Introduce a barrier to gather the results
double res = f1.get() + f2.get();
// Print the result
std::cout << "Sin(2.) = " << res << std::endl;
});
Links:
We want to compute the sum of all elements in some std::vector
n
in sequential and parallel using the C++ Standard template library without using any for
loop.
Links:
size_t len = 100'000'000;
int result = 0;
std::vector<int> n = std::vector<int>(len);
for (size_t i = 0; i < n.size(); i++)
n[i] = -1;
for (size_t i = 0; i < n.size(); i++)
result += n[i];
std::cout << "Result= " << result << std::endl;
.expr
std::vector<int> n2 = std::vector<int>(len);
std::fill(n2.begin(),n2.end(),-1);
result = std::accumulate(n2.begin(),n2.end(),0.0);
std::cout << "Result= " << result << std::endl;
%%writefile parallel.cpp
#include<execution>
#include<iostream>
int main(void){
size_t len = 1000000000;
std::vector<int> n = std::vector<int>(len);
std::fill(n.begin(),n.end(),-1);
int result = 0;
result = std::reduce(std::execution::par,n.begin(), n.end());
std::cout << "Result= " << result << std::endl;
return EXIT_SUCCESS;
}
The parallelism for the gcc is based-on the Threading Building Blocks library. Therefore, we need to add -ltbb
to the compiler. Since these feature are experimental, we need to use the following C++ standard -std=c++1z
.
%%bash
g++ -std=c++1z -ltbb parallel.cpp -o parallel
%%bash
./parallel
std::reduce(std::execution::par,n.begin(), n.end());
- Parallel executionstd::reduce(std::execution::seq,n.begin(), n.end());
- Sequential executionstd::reduce(std::execution::par_unseq,n.begin(), n.end());
- Parallel execution with vectorizationFore more details: CppCon 2016: Bryce AdelsteinLelbach “The C++17 Parallel Algorithms Library and Beyond”
std::vector
#include<hpx/include/parallel_reduce.hpp>
run_hpx([](){
std::cout << "Result:" << hpx::ranges::reduce(hpx::execution::par,
n.begin(),n.end(),0) << std::endl;
std::cout << "Result:" << hpx::ranges::reduce(hpx::execution::seq,
n.begin(),n.end(),0) << std::endl;
});
run_hpx([](){
auto f =
hpx::ranges::reduce(
hpx::execution::par(
hpx::execution::task),
n.begin(),
n.end(),0);
std::cout<< f.get();
});
std::vector
¶%%writefile loop.cpp
#include<execution>
#include<iostream>
#include<vector>
#include<numeric>
#include<algorithm>
#include <ctime>
#include <experimental/random>
int main(void)
{
std::vector<int> l = std::vector<int>(10);
std::srand (time(NULL));
std::generate(l.begin(), l.end(), std::rand);
std::vector<int> i = std::vector<int>(10);
std::iota(std::begin(i), std::end(i), 0);
std::for_each(
std::execution::par,
i.begin(),
i.end(),
[&](auto&& item)
{
std::cout << "Element: " << l[item] << " at index: " << item << std::endl;
});
return EXIT_SUCCESS;
}
%%bash
g++ loop.cpp -std=c++1z -ltbb -o loop
%%bash
./loop
#include<hpx/include/parallel_for_loop.hpp>
#include <cstdlib>
std::vector<int> l = std::vector<int>(10);
srand (time(NULL));
std::generate(l.begin(), l.end(), std::rand);
run_hpx([](){
hpx::for_loop(
hpx::execution::par,
0,
l.size(),
[](boost::uint64_t i)
{
std::cout << "Element: " << l[i] << " at index: " << i << std::endl;
}
);
});