Open
Description
I'm trying to parallellize a for loop in which I sum submatrices to a given matrix. That is, I would like to do something like
#include <iostream>
#include <xtensor/xarray.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xio.hpp>
#include <tbb/parallel_for.h>
#include <tbb/combinable.h>
int main()
{
std::size_t Nx{ 10 }, Ny{ 12 }, N{ 10000 };
tbb::combinable<xt::xarray<double>> mat_(xt::zeros<double>({ Nx, Ny }));
tbb::parallel_for(
tbb::blocked_range<int>(0, N),
[&](tbb::blocked_range<int> r) {
for (int i = r.begin(); i < r.end(); ++i)
{
int ix_0{ 2 };
int iy_0{ 4 };
int ix_1{ 5 };
int iy_1{ 8 };
xt::view(mat_.local(), xt::range(ix_0, ix_1), xt::range(iy_0, iy_1)) += 0.1 * xt::ones<double>({ ix_1 - ix_0, iy_1 - iy_0 });
}
}
);
auto mat = mat_.combine([](const xt::xarray<double>& x, const xt::xarray<double>& y) {return x + y; });
std::cout << mat;
return 0;
}
Of course, in my actual code the views and the rhs
of the +=
change over the iterations. The small example above fails to execute, and in my actual code the failure flags an 'heap corruption error'.
I have a workaround, but this requires a new initialization for every kernel:
#include <iostream>
#include <xtensor/xarray.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xio.hpp>
#include <tbb/parallel_for.h>
#include <tbb/combinable.h>
int main()
{
std::size_t Nx{ 10 }, Ny{ 12 }, N{ 10000 };
tbb::combinable<xt::xarray<double>> mat_(xt::zeros<double>({ Nx, Ny }));
tbb::parallel_for(
tbb::blocked_range<int>(0, N),
[&](tbb::blocked_range<int> r) {
xt::xarray<double> mat_temp = xt::zeros<double>({ Nx, Ny });
for (int i = r.begin(); i < r.end(); ++i)
{
int ix_0{ 2 };
int iy_0{ 4 };
int ix_1{ 5 };
int iy_1{ 8 };
xt::view(mat_temp, xt::range(ix_0, ix_1), xt::range(iy_0, iy_1)) += 0.1 * xt::ones<double>({ ix_1 - ix_0, iy_1 - iy_0 });
}
mat_.local() += mat_temp;
}
);
auto mat = mat_.combine([](const xt::xarray<double>& x, const xt::xarray<double>& y) {return x + y; });
std::cout << mat;
return 0;
}
I'm not sure the tbb parallellized assignment of xtensor, i.e. the code below, is doing what I want...
#define XTENSOR_USE_TBB
#include <iostream>
#include <xtensor/xarray.hpp>
#include <xtensor/xview.hpp>
#include <xtensor/xio.hpp>
int main()
{
std::size_t Nx{ 10 }, Ny{ 12 }, N{ 10000 };
xt::xarray<double>mat{ xt::zeros<double>({ Nx, Ny }) };
for (std::size_t i = 0; i < N; ++i)
{
int ix_0{ 2 };
int iy_0{ 4 };
int ix_1{ 5 };
int iy_1{ 8 };
xt::view(mat, xt::range(ix_0, ix_1), xt::range(iy_0, iy_1)) += 0.1 * xt::ones<double>({ ix_1 - ix_0, iy_1 - iy_0 });
}
std::cout << mat;
return 0;
}
Could you perhaps indicate the right way of doing what I want to do?