[Openmp-dev] [libomptarget] Data corruption on target nowait with more than 4 threads
Guilherme Valarini via Openmp-dev
openmp-dev at lists.llvm.org
Fri Feb 12 09:39:43 PST 2021
Hello everyone,
I'm having some data corruption issues when using the generic-elf plugin on
the program below (blocked matrix multiplication). I tried to use 3 builds
to test this program: the release branches "release/11.x" and
"release/12.x", and the main branch as well. I observed the following
behavior:
- release/11.x & main: the program works correctly with up to 4 OpenMP
threads (OMP_NUM_THREADS=4), but with any number higher than that the
result of the operation becomes incorrect. I believe that the problem may
also happen with 2-4 threads, but with a lower likelihood to do so (of 500
executions, none have presented the problem);
- release/12.x: the program crashes due to a segfault inside a function
called "__kmp_push_task" from OpenMP runtime regardless of the number of
threads.
The program was compiled with the following command after setting the
environment variables to point to the correct clang build:
"clang++ -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu BlockMatMul.cpp"
Does anyone know if this is an already known problem (e.g. multiple
parallel mappings happening at the same time)? What about
the "__kmp_push_task"?
Thanks for the help,
Guilherme Valarini
Here is the program (sorry I could not come up with a smaller example to
post it here). I have dumped the task graph build by OpenMP in a
dot/graphviz form and it seems to be correct with the indented dependencies
found at the function "BlockMatMul_TargetNowait":
#include <assert.h>
> #include <math.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <vector>
> #include <sys/time.h>
> #include <time.h>
> #include <unistd.h>
> class BlockMatrix {
> private:
> const int rowsPerBlock;
> const int colsPerBlock;
> const long nRows;
> const long nCols;
> const int nBlocksPerRow;
> const int nBlocksPerCol;
> std::vector<std::vector<float *>> Blocks;
> public:
> BlockMatrix(const int _rowsPerBlock, const int _colsPerBlock,
> const long _nRows, const long _nCols)
> : rowsPerBlock(_rowsPerBlock), colsPerBlock(_colsPerBlock),
> nRows(_nRows),
> nCols(_nCols), nBlocksPerRow(_nRows / _rowsPerBlock),
> nBlocksPerCol(_nCols / _colsPerBlock) {
> Blocks = std::vector<std::vector<float *>>(nBlocksPerCol);
> for (int i = 0; i < nBlocksPerCol; i++) {
> std::vector<float *> rowBlocks(nBlocksPerRow);
> for (int j = 0; j < nBlocksPerRow; j++) {
> rowBlocks[j] =
> (float *)calloc(_rowsPerBlock * _colsPerBlock, sizeof(float));
> }
> Blocks[i] = rowBlocks;
> }
> };
> ~BlockMatrix() {};
> // Initialize the BlockMatrix from 2D arrays
> void Initialize(float *matrix) {
> for (int i = 0; i < nBlocksPerCol; i++)
> for (int j = 0; j < nBlocksPerRow; j++) {
> float *CurrBlock = GetBlock(i, j);
> for (int ii = 0; ii < colsPerBlock; ++ii)
> for (int jj = 0; jj < rowsPerBlock; ++jj) {
> int curri = i * colsPerBlock + ii;
> int currj = j * rowsPerBlock + jj;
> CurrBlock[ii + jj * colsPerBlock] = matrix[curri + currj *
> nCols];
> }
> }
> }
> long Compare(float *matrix) {
> long fail=0;
> for (int i = 0; i < nBlocksPerCol; i++)
> for (int j = 0; j < nBlocksPerRow; j++) {
> float *CurrBlock = GetBlock(i, j);
> for (int ii = 0; ii < colsPerBlock; ++ii)
> for (int jj = 0; jj < rowsPerBlock; ++jj) {
> int curri = i * colsPerBlock + ii;
> int currj = j * rowsPerBlock + jj;
> float m_value = matrix[curri + currj * nCols];
> float bm_value = CurrBlock[ii + jj * colsPerBlock];
> if(bm_value != m_value){
> fprintf(stdout, "i,j = %d,%d\n", i, j);
> fprintf(stdout, "BlockMAT[%d][%d] = %f\n", ii, jj, bm_value);
> fprintf(stdout, "MAT[%d][%d] = %f\n", curri, currj, m_value);
> fail++;
> }
> }
> }
> // Print results
> printf("Non-Matching Block Outputs: %ld\n", fail);
> return fail;
> }
> float *GetBlock(int i, int j) {
> assert(i < nBlocksPerCol && j < nBlocksPerRow && "Accessing outside
> block");
> return Blocks[i][j];
> }
> };
>
> #define BS 256
> #define N 1024
>
> // Initialize matrices.
> void init(float *a, float *b) {
> int i, j;
> for (i = 0; i < N; ++i) {
> for (j = 0; j < N; ++j) {
> a[i * N + j] = (float)i + j % 100;
> b[i * N + j] = (float)i + j % 100;
> }
> }
> }
> int BlockMatMul_TargetNowait(BlockMatrix &A, BlockMatrix &B, BlockMatrix
> &C) {
> #pragma omp parallel
> #pragma omp master
> for (int i = 0; i < N / BS; ++i)
> for (int j = 0; j < N / BS; ++j) {
> float *BlockC = C.GetBlock(i, j);
> for (int k = 0; k < N / BS; ++k) {
> float *BlockA = A.GetBlock(i, k);
> float *BlockB = B.GetBlock(k,j);
> #pragma omp target depend(in: BlockA[0], BlockB[0]) \
> depend(inout: BlockC[0]) \
> map(to: BlockA[:BS*BS], BlockB[:BS*BS]) \
> map(tofrom: BlockC[:BS*BS]) nowait
> #pragma omp parallel for
> for(int ii = 0; ii < BS; ii++)
> for(int jj = 0; jj < BS; jj++) {
> for(int kk = 0; kk < BS; ++kk)
> BlockC[ii + jj * BS] += BlockA[ii + kk * BS] * BlockB[kk +
> jj * BS];
> }
> }
> }
> return 0;
> }
> void Matmul(float *a, float *b, float *c) {
> for (int i = 0; i < N; ++i) {
> for (int j = 0; j < N; ++j) {
> float sum = 0.0;
> for (int k = 0; k < N; ++k) {
> sum = sum + a[i * N + k] * b[k * N + j];
> }
> c[i * N + j] = sum;
> }
> }
> }
> int main(int argc, char *argv[]) {
> double t_start, t_end;
> int ret = 0;
> float *a = (float *)malloc(sizeof(float) * N * N);
> float *b = (float *)malloc(sizeof(float) * N * N);
> float *c = (float *)calloc(sizeof(float), N * N);
> init(a, b);
> auto BlockedA = BlockMatrix(BS, BS, N, N);
> BlockedA.Initialize(a);
> BlockedA.Compare(a);
> auto BlockedB = BlockMatrix(BS, BS, N, N);
> BlockedB.Initialize(b);
> BlockedB.Compare(b);
> Matmul(a, b, c);
> auto BlockedC = BlockMatrix(BS, BS, N, N);
> BlockMatMul_TargetNowait(BlockedA, BlockedB, BlockedC);
> if(BlockedC.Compare(c) > 0) {
> // exit code to error if there is any missmatch
> ret = 1;
> }
> free(a);
> free(b);
> free(c);
> return ret;
> }
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/openmp-dev/attachments/20210212/60ae76e3/attachment.html>
More information about the Openmp-dev
mailing list