[Openmp-commits] [PATCH] D94745: [OpenMP][WIP] Build the deviceRTLs with OpenMP instead of target dependent language - NOT FOR REVIEW

Wed Jan 20 14:51:06 PST 2021

JonChesterfield added a comment.

Tried:

  #include <stdint.h>

  #pragma omp declare target

  template <typename T> T atomicInc(T *, T);

  uint32_t __kmpc_atomic_inc_omp(uint32_t *Address, uint32_t Val) {
    uint32_t Old;
  #pragma omp atomic capture
    {
      Old = *Address;
      *Address += Old >= Val ? 0 : 1;
    }
    return Old;
  }

  uint32_t __kmpc_atomic_inc_omp2(uint32_t *Address, uint32_t Val) {
    uint32_t Old;
  #pragma omp atomic capture
    {
      Old = *Address;
      *Address = ((Old >= Val) ? 0 : (Old+1));
    }
    return Old;
  }

  #pragma omp end declare target

Got

  target triple = "nvptx64-nvidia-cuda"

  ; Function Attrs: nofree norecurse nounwind
  define hidden i32 @_Z21__kmpc_atomic_inc_ompPjj(i32* nocapture readonly %Address, i32 %Val) local_unnamed_addr #0 {
  entry:
    %0 = load atomic i32, i32* %Address monotonic, align 4
    ret i32 %0
  }

  ; Function Attrs: nofree norecurse nounwind
  define hidden i32 @_Z22__kmpc_atomic_inc_omp2Pjj(i32* nocapture %Address, i32 %Val) local_unnamed_addr #0 {
  entry:
    %0 = atomicrmw xchg i32* %Address, i32 0 monotonic
    ret i32 %0
  }

Neither looks right to me. Exchange with zero isn't an increment, and neither is a load.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94745/new/

https://reviews.llvm.org/D94745