[Openmp-commits] [openmp] fe29a16 - This is to fix runtime problem for member data used in target region.

Jennifer Yu via Openmp-commits openmp-commits at lists.llvm.org
Tue Jan 10 17:14:35 PST 2023


Author: Jennifer Yu
Date: 2023-01-10T16:59:49-08:00
New Revision: fe29a1695a6c69eb6616db01a559a3804d55fde8

URL: https://github.com/llvm/llvm-project/commit/fe29a1695a6c69eb6616db01a559a3804d55fde8
DIFF: https://github.com/llvm/llvm-project/commit/fe29a1695a6c69eb6616db01a559a3804d55fde8.diff

LOG: This is to fix runtime problem for member data used in target region.

The problem is happened when base class member field is used in target
region , the size is wrong, cause runtime to fail. Currently the size of
calculation is depended on index of field, since field is in base class,
the calculation is wrong.

According OpenMP 5.2 148:21:
If the target construct is within a class non-static member function,
and a variable is an accessible data member of the object for which the
non-static data member function is invoked, the variable is treated as
if the this[:1] expression had appeared in a map clause with a map-type
of tofrom.

One way to fix this is emitting code to generate this[:1] instead only
when class has any base class.

Differential Revision: https://reviews.llvm.org/D141350

Added: 
    clang/test/OpenMP/target_map_member_expr_codegen.cpp
    openmp/libomptarget/test/mapping/target_map_for_member_data.cpp

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c0893d70340d9..c2328d28ec50a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8444,19 +8444,39 @@ class MappableExprsHandler {
     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
     // Pointer is the address of the lowest element
     llvm::Value *LB = LBAddr.getPointer();
-    CombinedInfo.Pointers.push_back(LB);
+    const CXXMethodDecl *MD =
+        CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
+    const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
+    bool HasBaseClass = RD ? RD->getNumBases() > 0 : false;
     // There should not be a mapper for a combined entry.
+    if (HasBaseClass) {
+      // OpenMP 5.2 148:21:
+      // If the target construct is within a class non-static member function,
+      // and a variable is an accessible data member of the object for which the
+      // non-static data member function is invoked, the variable is treated as
+      // if the this[:1] expression had appeared in a map clause with a map-type
+      // of tofrom.
+      // Emit this[:1]
+      CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
+      QualType Ty = MD->getThisType()->getPointeeType();
+      llvm::Value *Size =
+          CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
+                                    /*isSigned=*/true);
+      CombinedInfo.Sizes.push_back(Size);
+    } else {
+      CombinedInfo.Pointers.push_back(LB);
+      // Size is (addr of {highest+1} element) - (addr of lowest element)
+      llvm::Value *HB = HBAddr.getPointer();
+      llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
+          HBAddr.getElementType(), HB, /*Idx0=*/1);
+      llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
+      llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
+      llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
+                                                    /*isSigned=*/false);
+      CombinedInfo.Sizes.push_back(Size);
+    }
     CombinedInfo.Mappers.push_back(nullptr);
-    // Size is (addr of {highest+1} element) - (addr of lowest element)
-    llvm::Value *HB = HBAddr.getPointer();
-    llvm::Value *HAddr =
-        CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
-    llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
-    llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
-    llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
-    llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
-                                                  /*isSigned=*/false);
-    CombinedInfo.Sizes.push_back(Size);
     // Map type is always TARGET_PARAM, if generate info for captures.
     CombinedInfo.Types.push_back(
         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE

diff  --git a/clang/test/OpenMP/target_map_member_expr_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_codegen.cpp
new file mode 100644
index 0000000000000..2d9777197a791
--- /dev/null
+++ b/clang/test/OpenMP/target_map_member_expr_codegen.cpp
@@ -0,0 +1,122 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=x86_64-pc-linux-gnu  \
+// RUN:  -x c++ -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+
+// expected-no-diagnostics
+
+// CHECK: @.offload_sizes = private unnamed_addr constant [4 x i64] [i64 12, i64 4, i64 4, i64 4]
+// CHECK-NOT: @.offload_sizes = private unnamed_addr constant [4 x i64] [i64 0, i64 4, i64 4, i64 4]
+
+// CHECK-LABEL: define {{[^@]+}}@_Z3foov(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[B:%.*]] = alloca [[CLASS_B:%.*]], align 4
+// CHECK-NEXT:    call void @_ZN1BC1Eii(ptr noundef nonnull align 4 dereferenceable(12) [[B]], i32 noundef 2, i32 noundef 3)
+// CHECK-NEXT:    call void @_ZN1B3runEv(ptr noundef nonnull align 4 dereferenceable(12) [[B]])
+// CHECK-NEXT:    ret void
+//
+class A {
+protected:
+  int X;
+  int Y;
+
+public:
+  A (int x, int y) : X { x }, Y { y } { };
+};
+
+class B : public A {
+  using A::X;
+  using A::Y;
+public:
+  int res;
+// CHECK-LABEL: define {{[^@]+}}@_ZN1BC1Eii(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[Y_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[X:%.*]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[Y:%.*]], ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    call void @_ZN1BC2Eii(ptr noundef nonnull align 4 dereferenceable(12) [[THIS1]], i32 noundef [[TMP0]], i32 noundef [[TMP1]])
+// CHECK-NEXT:    ret void
+//
+  B (int x, int y) : A(x,y), res{0} {}
+// CHECK-LABEL: define {{[^@]+}}@_ZN1B3runEv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK-NEXT:    store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NOT:     sdiv exact i64 {{.*}}, ptrtoint
+// CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds [[CLASS_B:%.*]], ptr [[THIS1]], i32 0, i32 1
+// CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds [[CLASS_A:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds [[CLASS_A]], ptr [[THIS1]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[THIS1]], ptr [[TMP0]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[THIS1]], ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NEXT:    store ptr null, ptr [[TMP2]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[THIS1]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[RES]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK-NEXT:    store ptr null, ptr [[TMP5]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[THIS1]], ptr [[TMP6]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[X]], ptr [[TMP7]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-NEXT:    store ptr null, ptr [[TMP8]], align 8
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[Y]], ptr [[TMP10]], align 8
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK-NEXT:    store ptr null, ptr [[TMP11]], align 8
+// CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NEXT:    store i32 1, ptr [[TMP14]], align 4
+// CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NEXT:    store i32 4, ptr [[TMP15]], align 4
+// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP12]], ptr [[TMP16]], align 8
+// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[TMP13]], ptr [[TMP17]], align 8
+// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NEXT:    store ptr @.offload_sizes, ptr [[TMP18]], align 8
+// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NEXT:    store ptr @.offload_maptypes, ptr [[TMP19]], align 8
+// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NEXT:    store ptr null, ptr [[TMP20]], align 8
+// CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NEXT:    store ptr null, ptr [[TMP21]], align 8
+// CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NEXT:    store i64 0, ptr [[TMP22]], align 8
+// CHECK-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.__omp_offloading_{{.*}}__ZN1B3runEv_{{.*}}.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+// CHECK-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK:       omp_offload.failed:
+// CHECK-NEXT:    call void @__omp_offloading_{{.*}}__ZN1B3runEv_{{.*}}(ptr [[THIS1]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// CHECK:       omp_offload.cont:
+// CHECK-NEXT:    ret void
+//
+  void run (void) {
+  #pragma omp target
+     res = X + Y;
+  }
+};
+
+void foo() {
+  B b(2, 3);
+  b.run();
+}

diff  --git a/openmp/libomptarget/test/mapping/target_map_for_member_data.cpp b/openmp/libomptarget/test/mapping/target_map_for_member_data.cpp
new file mode 100644
index 0000000000000..5a38e40fff817
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/target_map_for_member_data.cpp
@@ -0,0 +1,68 @@
+// RUN: %libomptarget-compile-generic -fopenmp-version=51
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+extern "C" int printf(const char *, ...);
+template <typename T> class A {
+protected:
+  T X;
+  T Y;
+
+public:
+  A(T x, T y) : X{x}, Y{y} {};
+};
+
+template <typename T> class B : public A<T> {
+  using A<T>::X;
+  using A<T>::Y;
+
+public:
+  T res;
+
+  B(T x, T y) : A<T>(x, y), res{0} {};
+
+  void run(void) {
+#pragma omp target map(res)
+    { res = X + Y; }
+  }
+};
+
+class X {
+protected:
+  int A;
+
+public:
+  X(int a) : A{a} {};
+};
+class Y : public X {
+  using X::A;
+
+protected:
+  int B;
+
+public:
+  Y(int a, int b) : X(a), B{b} {};
+};
+class Z : public Y {
+  using X::A;
+  using Y::B;
+
+public:
+  int res;
+  Z(int a, int b) : Y(a, b), res{0} {};
+  void run(void) {
+#pragma omp target map(res)
+    { res = A + B; }
+  }
+};
+
+int main(int argc, char *argv[]) {
+  B<int> b(2, 3);
+  b.run();
+  // CHECK: 5
+  printf("b.res = %d \n", b.res);
+  Z c(2, 3);
+  c.run();
+  // CHECK: 5
+  printf("c.res = %d \n", c.res);
+}


        


More information about the Openmp-commits mailing list