[clang] [llvm] [OpenMP] Fix runtime problem when explicit map both pointer and pointee. (PR #92210)
via cfe-commits
cfe-commits at lists.llvm.org
Tue May 14 21:35:24 PDT 2024
https://github.com/jyu2-git updated https://github.com/llvm/llvm-project/pull/92210
>From 6afac123ae1a548a788d05bbf0a8add74f5e0cdc Mon Sep 17 00:00:00 2001
From: Jennifer Yu <jennifer.yu at intel.com>
Date: Tue, 14 May 2024 17:31:56 -0700
Subject: [PATCH 1/2] [OpenMP] Fix runtime problem when explicit map both
pointer and pointee.
For ponter int *p for following map, test currently crash.
map(p, p[:100]) or map(p, p[1])
Currly IR looks like
// &p, &p, sizeof(int), TARGET_PARAM | TO | FROM
// &p, p[0], 100sizeof(float) TO | FROM
Worrking IR is
// map(p, p[0:100]) to map(p[0:100])
// &p, &p[0], 100*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
The change is add new argument AreBothBasePtrAndPteeMapped in
generateInfoForComponentList
Use that to skip map for map(p), when processing map(p[:100])
generate map with right flag.
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 37 +++--
...arget_map_both_pointer_pointee_codegen.cpp | 150 ++++++++++++++++++
.../test/mapping/map_both_pointer_pointee.c | 46 ++++++
3 files changed, 224 insertions(+), 9 deletions(-)
create mode 100644 clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp
create mode 100644 offload/test/mapping/map_both_pointer_pointee.c
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e39c7c58d2780..f56af318ff6ae 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6830,7 +6830,8 @@ class MappableExprsHandler {
const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
- OverlappedElements = std::nullopt) const {
+ OverlappedElements = std::nullopt,
+ bool AreBothBasePtrAndPteeMapped = false) const {
// The following summarizes what has to be generated for each map and the
// types below. The generated information is expressed in this order:
// base pointer, section pointer, size, flags
@@ -7006,6 +7007,10 @@ class MappableExprsHandler {
// &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
// (*) the struct this entry pertains to is the 4th element in the list
// of arguments, hence MEMBER_OF(4)
+ //
+ // map(p, p[:100])
+ // ===> map(p[:100])
+ // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
// Track if the map information being generated is the first for a capture.
bool IsCaptureFirstInfo = IsFirstComponentList;
@@ -7029,6 +7034,8 @@ class MappableExprsHandler {
const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
+ if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
+ return;
if (isa<MemberExpr>(AssocExpr)) {
// The base is the 'this' pointer. The content of the pointer is going
// to be the base of the field being mapped.
@@ -7071,8 +7078,9 @@ class MappableExprsHandler {
// can be associated with the combined storage if shared memory mode is
// active or the base declaration is not global variable.
const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
- if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
- !VD || VD->hasLocalStorage())
+ if (!AreBothBasePtrAndPteeMapped &&
+ (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
+ !VD || VD->hasLocalStorage()))
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
else
FirstPointerInComplexData = true;
@@ -7394,11 +7402,13 @@ class MappableExprsHandler {
// same expression except for the first one. We also need to signal
// this map is the first one that relates with the current capture
// (there is a set of entries for each capture).
- OpenMPOffloadMappingFlags Flags = getMapTypeBits(
- MapType, MapModifiers, MotionModifiers, IsImplicit,
- !IsExpressionFirstInfo || RequiresReference ||
- FirstPointerInComplexData || IsMemberReference,
- IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
+ OpenMPOffloadMappingFlags Flags =
+ getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
+ !IsExpressionFirstInfo || RequiresReference ||
+ FirstPointerInComplexData || IsMemberReference,
+ AreBothBasePtrAndPteeMapped ||
+ (IsCaptureFirstInfo && !RequiresReference),
+ IsNonContiguous);
if (!IsExpressionFirstInfo || IsMemberReference) {
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
@@ -8492,6 +8502,8 @@ class MappableExprsHandler {
assert(CurDir.is<const OMPExecutableDirective *>() &&
"Expect a executable directive");
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
+ bool HasMapBasePtr = false;
+ bool HasMapArraySec = false;
for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
const auto *EI = C->getVarRefs().begin();
for (const auto L : C->decl_component_lists(VD)) {
@@ -8503,6 +8515,11 @@ class MappableExprsHandler {
assert(VDecl == VD && "We got information for the wrong declaration??");
assert(!Components.empty() &&
"Not expecting declaration with no component lists.");
+ if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
+ HasMapBasePtr = true;
+ if (VD && E && VD->getType()->isAnyPointerType() &&
+ (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
+ HasMapArraySec = true;
DeclComponentLists.emplace_back(Components, C->getMapType(),
C->getMapTypeModifiers(),
C->isImplicit(), Mapper, E);
@@ -8685,7 +8702,9 @@ class MappableExprsHandler {
MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
- /*ForDeviceAddr=*/false, VD, VarRef);
+ /*ForDeviceAddr=*/false, VD, VarRef,
+ /*OverlappedElements*/ std::nullopt,
+ HasMapBasePtr && HasMapArraySec);
IsFirstComponentList = false;
}
}
diff --git a/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp b/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp
new file mode 100644
index 0000000000000..e2c27f37f5b9d
--- /dev/null
+++ b/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp
@@ -0,0 +1,150 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+extern void *malloc (int __size) throw () __attribute__ ((__malloc__));
+
+void foo() {
+ int *ptr = (int *) malloc(3 * sizeof(int));
+
+ #pragma omp target map(ptr, ptr[0:2])
+ {
+ ptr[1] = 6;
+ }
+ #pragma omp target map(ptr, ptr[2])
+ {
+ ptr[2] = 8;
+ }
+}
+#endif
+// CHECK-LABEL: define {{[^@]+}}@_Z3foov
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[PTR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call noalias noundef ptr @_Z6malloci(i32 noundef signext 12) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: store ptr [[CALL]], ptr [[PTR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[PTR]], ptr [[TMP2]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP3]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP4]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NEXT: store i32 3, ptr [[TMP7]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NEXT: store i32 1, ptr [[TMP8]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP13]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP15]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-NEXT: store i64 0, ptr [[TMP16]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP17]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-NEXT: store i32 0, ptr [[TMP19]], align 4
+// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// CHECK-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK: omp_offload.failed:
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15(ptr [[TMP0]]) #[[ATTR3]]
+// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// CHECK: omp_offload.cont:
+// CHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 2
+// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[PTR]], ptr [[TMP24]], align 8
+// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[ARRAYIDX1]], ptr [[TMP25]], align 8
+// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP26]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
+// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
+// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK-NEXT: store i32 3, ptr [[TMP29]], align 4
+// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK-NEXT: store i32 1, ptr [[TMP30]], align 4
+// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP27]], ptr [[TMP31]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK-NEXT: store ptr [[TMP28]], ptr [[TMP32]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.1, ptr [[TMP33]], align 8
+// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP34]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP35]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK-NEXT: store ptr null, ptr [[TMP36]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP37]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK-NEXT: store i64 0, ptr [[TMP38]], align 8
+// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP39]], align 4
+// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP40]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK-NEXT: store i32 0, ptr [[TMP41]], align 4
+// CHECK-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
+// CHECK-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK: omp_offload.failed6:
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19(ptr [[TMP22]]) #[[ATTR3]]
+// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT7]]
+// CHECK: omp_offload.cont7:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15
+// CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
+// CHECK-NEXT: store i32 6, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19
+// CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
+// CHECK-NEXT: store i32 8, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
diff --git a/offload/test/mapping/map_both_pointer_pointee.c b/offload/test/mapping/map_both_pointer_pointee.c
new file mode 100644
index 0000000000000..c23d218b90429
--- /dev/null
+++ b/offload/test/mapping/map_both_pointer_pointee.c
@@ -0,0 +1,46 @@
+// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda
+
+// REQUIRES: unified_shared_memory
+// UNSUPPORTED: amdgcn-amd-amdhsa
+
+#pragma omp declare target
+int *ptr1;
+#pragma omp end declare target
+
+#include <stdio.h>
+#include <stdlib.h>
+int main() {
+ ptr1 = (int *)malloc(sizeof(int) * 100);
+ int *ptr2;
+ ptr2 = (int *)malloc(sizeof(int) * 100);
+#pragma omp target map(ptr1, ptr1[ : 100])
+ {
+ ptr1[1] = 6;
+ }
+ // CHECK: 6
+ printf(" %d \n", ptr1[1]);
+#pragma omp target data map(ptr1[ : 5])
+ {
+#pragma omp target map(ptr1[2], ptr1, ptr1[3]) map(ptr2, ptr2[2])
+ {
+ ptr1[2] = 7;
+ ptr1[3] = 9;
+ ptr2[2] = 7;
+ }
+ }
+ // CHECK: 7 7 9
+ printf(" %d %d %d \n", ptr2[2], ptr1[2], ptr1[3]);
+ free(ptr1);
+#pragma omp target map(ptr2, ptr2[ : 100])
+ {
+ ptr2[1] = 6;
+ }
+ // CHECK: 6
+ printf(" %d \n", ptr2[1]);
+ free(ptr2);
+ return 0;
+}
>From f2781e7d2aac82dcc85032279a6fed0f44c0aab3 Mon Sep 17 00:00:00 2001
From: Jennifer Yu <jennifer.yu at intel.com>
Date: Tue, 14 May 2024 21:34:57 -0700
Subject: [PATCH 2/2] Fix format problem.
---
offload/test/mapping/map_both_pointer_pointee.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/offload/test/mapping/map_both_pointer_pointee.c b/offload/test/mapping/map_both_pointer_pointee.c
index c23d218b90429..4b724823e7a40 100644
--- a/offload/test/mapping/map_both_pointer_pointee.c
+++ b/offload/test/mapping/map_both_pointer_pointee.c
@@ -18,9 +18,7 @@ int main() {
int *ptr2;
ptr2 = (int *)malloc(sizeof(int) * 100);
#pragma omp target map(ptr1, ptr1[ : 100])
- {
- ptr1[1] = 6;
- }
+ { ptr1[1] = 6; }
// CHECK: 6
printf(" %d \n", ptr1[1]);
#pragma omp target data map(ptr1[ : 5])
@@ -36,9 +34,7 @@ int main() {
printf(" %d %d %d \n", ptr2[2], ptr1[2], ptr1[3]);
free(ptr1);
#pragma omp target map(ptr2, ptr2[ : 100])
- {
- ptr2[1] = 6;
- }
+ { ptr2[1] = 6; }
// CHECK: 6
printf(" %d \n", ptr2[1]);
free(ptr2);
More information about the cfe-commits
mailing list