[clang] [OpenMP] Don't emit redundant zero-sized mapping nodes for overlapped structs (PR #148947)
Julian Brown via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 16 04:59:08 PDT 2025
https://github.com/jtb20 updated https://github.com/llvm/llvm-project/pull/148947
>From c30dcc39a2b14eb0aef51e5f8bb34b7338e587d0 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown at amd.com>
Date: Thu, 19 Jun 2025 07:47:38 -0500
Subject: [PATCH] [OpenMP] Don't emit redundant zero-sized mapping nodes for
overlapped structs
The handling of overlapped structure mapping in CGOpenMPRuntime.cpp can
lead to redundant zero-sized mapping nodes at runtime. This patch fixes
it using a combination of approaches: trivially adjacent struct members
won't have a mapping node created between them, and for more complicated
cases (inheritance) the physical layout of the struct/class is used to
make sure that elements aren't missed.
I've introduced a new class to track the state whilst iterating over
the struct. This reduces a bit of redundancy in the code (accumulating
CombinedInfo both during and after the loop), which I think is a bit
neater.
Before:
omptarget --> Entry 0: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=48, Type=0x20, Name=unknown
omptarget --> Entry 1: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 2: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 3: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 4: Base=0x00007fff8d483830, Begin=0x00007fff8d48383c, Size=20, Type=0x1000000000003, Name=unknown
omptarget --> Entry 5: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 6: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=0, Type=0x1000000000003, Name=unknown
omptarget --> Entry 7: Base=0x00007fff8d483830, Begin=0x00007fff8d48385c, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 8: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 9: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 10: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 11: Base=0x00007fff8d483840, Begin=0x00005e7665275130, Size=32, Type=0x1000000000013, Name=unknown
omptarget --> Entry 12: Base=0x00007fff8d483830, Begin=0x00007fff8d483850, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 13: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 14: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=4, Type=0x1000000000003, Name=unknown
After:
omptarget --> Entry 0: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=48, Type=0x20, Name=unknown
omptarget --> Entry 1: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562ec, Size=20, Type=0x1000000000003, Name=unknown
omptarget --> Entry 2: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f5630c, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 3: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 4: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e4, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 5: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e8, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 6: Base=0x00007fffd0f562f0, Begin=0x000058b6013fb130, Size=32, Type=0x1000000000013, Name=unknown
omptarget --> Entry 7: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56300, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 8: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56304, Size=4, Type=0x1000000000003, Name=unknown
omptarget --> Entry 9: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56308, Size=4, Type=0x1000000000003, Name=unknown
For code:
#include <cstdlib>
#include <cstdio>
struct S {
int x;
int y;
int z;
int *p1;
int *p2;
};
struct T : public S {
int a;
int b;
int c;
};
int main() {
T v;
v.p1 = (int*) calloc(8, sizeof(int));
v.p2 = (int*) calloc(8, sizeof(int));
#pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c)
{
v.x++;
v.y += 2;
v.z += 3;
v.p1[0] += 4;
v.a += 7;
v.b += 5;
v.c += 6;
}
return 0;
}
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 157 ++++++++++++++------
clang/test/OpenMP/copy-gaps-1.cpp | 52 +++++++
clang/test/OpenMP/copy-gaps-2.cpp | 52 +++++++
clang/test/OpenMP/copy-gaps-3.cpp | 46 ++++++
clang/test/OpenMP/copy-gaps-4.cpp | 48 ++++++
clang/test/OpenMP/copy-gaps-5.cpp | 50 +++++++
clang/test/OpenMP/copy-gaps-6.cpp | 87 +++++++++++
clang/test/OpenMP/target_map_codegen_35.cpp | 29 +---
8 files changed, 449 insertions(+), 72 deletions(-)
create mode 100644 clang/test/OpenMP/copy-gaps-1.cpp
create mode 100644 clang/test/OpenMP/copy-gaps-2.cpp
create mode 100644 clang/test/OpenMP/copy-gaps-3.cpp
create mode 100644 clang/test/OpenMP/copy-gaps-4.cpp
create mode 100644 clang/test/OpenMP/copy-gaps-5.cpp
create mode 100644 clang/test/OpenMP/copy-gaps-6.cpp
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ce2dd4d76368a..f1698a0bec373 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7080,6 +7080,110 @@ class MappableExprsHandler {
return ConstLength.getSExtValue() != 1;
}
+ /// A helper class to copy structures with overlapped elements, i.e. those
+ /// which have mappings of both "s" and "s.mem". Consecutive elements that
+ /// are not explicitly copied have mapping nodes synthesized for them,
+ /// taking care to avoid generating zero-sized copies.
+ class CopyOverlappedEntryGaps {
+ CodeGenFunction &CGF;
+ MapCombinedInfoTy &CombinedInfo;
+ OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
+ const ValueDecl *MapDecl = nullptr;
+ const Expr *MapExpr = nullptr;
+ Address BP = Address::invalid();
+ bool IsNonContiguous = false;
+ uint64_t DimSize = 0;
+ // These elements track the position as the struct is iterated over
+ // (in order of increasing element address).
+ const RecordDecl *LastParent = nullptr;
+ uint64_t Cursor = 0;
+ unsigned LastIndex = -1u;
+ Address LB = Address::invalid();
+
+ public:
+ CopyOverlappedEntryGaps(CodeGenFunction &CGF,
+ MapCombinedInfoTy &CombinedInfo,
+ OpenMPOffloadMappingFlags Flags,
+ const ValueDecl *MapDecl, const Expr *MapExpr,
+ Address BP, Address LB, bool IsNonContiguous,
+ uint64_t DimSize)
+ : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
+ MapExpr(MapExpr), BP(BP), LB(LB), IsNonContiguous(IsNonContiguous),
+ DimSize(DimSize) {}
+
+ void processField(
+ const OMPClauseMappableExprCommon::MappableComponent &MC,
+ const FieldDecl *FD,
+ llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
+ EmitMemberExprBase) {
+ const RecordDecl *RD = FD->getParent();
+ const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
+ uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
+ uint64_t FieldSize =
+ CGF.getContext().getTypeSize(FD->getType().getCanonicalType());
+ Address ComponentLB = Address::invalid();
+
+ if (FD->getType()->isLValueReferenceType()) {
+ const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
+ LValue BaseLVal = EmitMemberExprBase(CGF, ME);
+ ComponentLB =
+ CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
+ } else {
+ ComponentLB =
+ CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()).getAddress();
+ }
+
+ if (!LastParent)
+ LastParent = RD;
+ if (FD->getParent() == LastParent) {
+ if (FD->getFieldIndex() != LastIndex + 1)
+ copyUntilField(FD, ComponentLB);
+ } else {
+ LastParent = FD->getParent();
+ if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
+ copyUntilField(FD, ComponentLB);
+ }
+ Cursor = FieldOffset + FieldSize;
+ LastIndex = FD->getFieldIndex();
+ LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
+ }
+
+ void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
+ llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
+ llvm::Value *LBPtr = LB.emitRawPointer(CGF);
+ llvm::Value *Size =
+ CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
+ copySizedChunk(LBPtr, Size);
+ }
+
+ void copyUntilEnd(Address HB) {
+ if (LastParent) {
+ const ASTRecordLayout &RL =
+ CGF.getContext().getASTRecordLayout(LastParent);
+ if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
+ return;
+ }
+ llvm::Value *LBPtr = LB.emitRawPointer(CGF);
+ llvm::Value *Size = CGF.Builder.CreatePtrDiff(
+ CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
+ LBPtr);
+ copySizedChunk(LBPtr, Size);
+ }
+
+ void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
+ CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
+ CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
+ CombinedInfo.DevicePtrDecls.push_back(nullptr);
+ CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
+ CombinedInfo.Pointers.push_back(Base);
+ CombinedInfo.Sizes.push_back(
+ CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
+ CombinedInfo.Types.push_back(Flags);
+ CombinedInfo.Mappers.push_back(nullptr);
+ CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
+ }
+ };
+
/// Generate the base pointers, section pointers, sizes, map type bits, and
/// user-defined mappers (all included in \a CombinedInfo) for the provided
/// map type, map or motion modifiers, and expression components.
@@ -7570,63 +7674,22 @@ class MappableExprsHandler {
getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
/*AddPtrFlag=*/false,
/*AddIsTargetParamFlag=*/false, IsNonContiguous);
- llvm::Value *Size = nullptr;
+ CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
+ MapExpr, BP, LB, IsNonContiguous,
+ DimSize);
// Do bitcopy of all non-overlapped structure elements.
for (OMPClauseMappableExprCommon::MappableExprComponentListRef
Component : OverlappedElements) {
- Address ComponentLB = Address::invalid();
for (const OMPClauseMappableExprCommon::MappableComponent &MC :
Component) {
if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
- const auto *FD = dyn_cast<FieldDecl>(VD);
- if (FD && FD->getType()->isLValueReferenceType()) {
- const auto *ME =
- cast<MemberExpr>(MC.getAssociatedExpression());
- LValue BaseLVal = EmitMemberExprBase(CGF, ME);
- ComponentLB =
- CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
- .getAddress();
- } else {
- ComponentLB =
- CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
- .getAddress();
+ if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
+ CopyGaps.processField(MC, FD, EmitMemberExprBase);
}
- llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
- llvm::Value *LBPtr = LB.emitRawPointer(CGF);
- Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
- LBPtr);
- break;
}
}
- assert(Size && "Failed to determine structure size");
- CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
- CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
- CombinedInfo.DevicePtrDecls.push_back(nullptr);
- CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
- CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
- CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
- Size, CGF.Int64Ty, /*isSigned=*/true));
- CombinedInfo.Types.push_back(Flags);
- CombinedInfo.Mappers.push_back(nullptr);
- CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
- : 1);
- LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
}
- CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
- CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
- CombinedInfo.DevicePtrDecls.push_back(nullptr);
- CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
- CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
- llvm::Value *LBPtr = LB.emitRawPointer(CGF);
- Size = CGF.Builder.CreatePtrDiff(
- CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
- LBPtr);
- CombinedInfo.Sizes.push_back(
- CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
- CombinedInfo.Types.push_back(Flags);
- CombinedInfo.Mappers.push_back(nullptr);
- CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
- : 1);
+ CopyGaps.copyUntilEnd(HB);
break;
}
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
diff --git a/clang/test/OpenMP/copy-gaps-1.cpp b/clang/test/OpenMP/copy-gaps-1.cpp
new file mode 100644
index 0000000000000..3d4fae352eed6
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-1.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+ int x;
+ int y;
+ int z;
+ int *p1;
+ int *p2;
+};
+
+struct T : public S {
+ int a;
+ int b;
+ int c;
+};
+
+int main() {
+ T v;
+
+#pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c)
+ {
+ v.x++;
+ v.y += 2;
+ v.z += 3;
+ v.p1[0] += 4;
+ v.a += 7;
+ v.b += 5;
+ v.c += 6;
+ }
+
+ return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [10 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4, i64 32, i64 4, i64 4, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [10 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [10 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Check for filling of four non-constant size elements here: the whole struct
+// size, the (padded) region covering p1 & p2, and the padding at the end of
+// struct T.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[P1P2:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[P1P2]], align 8
+// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-2.cpp b/clang/test/OpenMP/copy-gaps-2.cpp
new file mode 100644
index 0000000000000..5bf603a3d9edb
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-2.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+ int x;
+ int y;
+ int z;
+};
+
+struct M : public S {
+ int mid;
+};
+
+struct T : public M {
+ int a;
+ int b;
+ int c;
+};
+
+int main() {
+ T v;
+
+#pragma omp target map(tofrom: v, v.y, v.z, v.a)
+ {
+ v.y++;
+ v.z += 2;
+ v.a += 3;
+ v.mid += 5;
+ }
+
+ return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill four non-constant size elements here: the whole struct size, the region
+// covering v.x, the region covering v.mid and the region covering v.b and v.c.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
+// CHECK-DAG: [[MID:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[MID]], align 8
+// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
+// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-3.cpp b/clang/test/OpenMP/copy-gaps-3.cpp
new file mode 100644
index 0000000000000..5febb181ca1c5
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-3.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+ int x;
+ int y;
+ int z;
+};
+
+struct T : public S {
+ int a;
+ int b;
+ int c;
+};
+
+int main() {
+ T v;
+
+ // This one should have no gap between v.z & v.a.
+#pragma omp target map(tofrom: v, v.y, v.z, v.a)
+ {
+ v.y++;
+ v.z += 2;
+ v.a += 3;
+ }
+
+ return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [6 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [6 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [6 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill three non-constant size elements here: the whole struct size, the region
+// covering v.x, and the region covering v.b and v.c.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
+// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-4.cpp b/clang/test/OpenMP/copy-gaps-4.cpp
new file mode 100644
index 0000000000000..7060fe3ea2a01
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-4.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+ int x;
+ int y;
+ char z; // Hidden padding after here...
+};
+
+struct T : public S {
+ int a;
+ int b;
+ int c;
+};
+
+int main() {
+ T v;
+
+#pragma omp target map(tofrom: v, v.y, v.z, v.a)
+ {
+ v.y++;
+ v.z += 2;
+ v.a += 3;
+ }
+
+ return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill four non-constant size elements here: the whole struct size, the region
+// covering v.x, the region covering padding after v.z and the region covering
+// v.b and v.c.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
+// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
+// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
+// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-5.cpp b/clang/test/OpenMP/copy-gaps-5.cpp
new file mode 100644
index 0000000000000..fae675dc2f505
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-5.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+template<typename C>
+struct S {
+ C x;
+ C y;
+ char z; // Hidden padding after here...
+};
+
+template<typename C>
+struct T : public S<C> {
+ C a;
+ C b;
+ C c;
+};
+
+int main() {
+ T<int> v;
+
+#pragma omp target map(tofrom: v, v.y, v.z, v.a)
+ {
+ v.y++;
+ v.z += 2;
+ v.a += 3;
+ }
+
+ return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill four non-constant size elements here: the whole struct size, the region
+// covering v.x, the region covering padding after v.z and the region covering
+// v.b and v.c.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
+// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
+// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
+// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-6.cpp b/clang/test/OpenMP/copy-gaps-6.cpp
new file mode 100644
index 0000000000000..9c62fde1c3762
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-6.cpp
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+ int x;
+ int *arr;
+ int y;
+ int z;
+};
+
+int main() {
+ S v;
+
+#pragma omp target map(tofrom: v, v.x, v.z)
+ {
+ v.x++;
+ v.y += 2;
+ v.z += 3;
+ }
+
+#pragma omp target map(tofrom: v, v.x, v.arr[:1])
+ {
+ v.x++;
+ v.y += 2;
+ v.arr[0] += 2;
+ v.z += 4;
+ }
+
+#pragma omp target map(tofrom: v, v.arr[:1])
+ {
+ v.x++;
+ v.y += 2;
+ v.arr[0] += 2;
+ v.z += 4;
+ }
+
+ return 0;
+}
+
+// CHECK: [[CSTSZ0:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4]
+// CHECK: [[CSTTY0:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK: [[CSTSZ1:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4]
+// CHECK: [[CSTTY1:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]]
+
+// CHECK: [[CSTSZ2:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 24, i64 4]
+// CHECK: [[CSTTY2:@.+]] = private {{.*}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill two non-constant size elements here: the whole struct size, and the
+// region covering v.arr and v.y.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[ARRY:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRY]], align 8
+
+// CHECK: call void
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill two non-constant size elements here: the whole struct size, and the
+// region covering v.arr, v.y and v.z.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[ARRYZ:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRYZ]], align 8
+
+// CHECK: call void
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [3 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill one non-constant size element here: the whole struct size.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [3 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
diff --git a/clang/test/OpenMP/target_map_codegen_35.cpp b/clang/test/OpenMP/target_map_codegen_35.cpp
index afa09656f70d1..c4fc49cb6e218 100644
--- a/clang/test/OpenMP/target_map_codegen_35.cpp
+++ b/clang/test/OpenMP/target_map_codegen_35.cpp
@@ -27,11 +27,11 @@ class S {
void foo();
};
-// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 0, i64 8]
+// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 0, i64 8]
// TARGET_PARAM = 0x20
// MEMBER_OF_1 | TO = 0x1000000000001
// MEMBER_OF_1 | PTR_AND_OBJ | TO = 0x1000000000011
-// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
+// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
// CK35-DAG: [[SIZE_FROM:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 8]
// TARGET_PARAM = 0x20
// MEMBER_OF_1 | PTR_AND_OBJ | FROM = 0x1000000000012
@@ -86,35 +86,14 @@ void ref_map() {
// CK35-DAG: [[B_BEGIN_INTPTR]] = ptrtoint ptr [[B_BEGIN_VOID:%.+]] to i64
// CK35-DAG: [[B_ADDR:%.+]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1
- // pass MEMBER_OF_1 | TO {&s, &s.b+1, ((ptr)(&s+1)-(ptr)(&s.b+1))} to copy the data of remainder of s.
+ // pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b.
// CK35-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
// CK35-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
- // CK35-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
// CK35-DAG: store ptr [[S_ADDR]], ptr [[BP2]],
- // CK35-DAG: store ptr [[B_END:%.+]], ptr [[P2]],
- // CK35-DAG: store i64 [[REM_SIZE:%.+]], ptr [[S2]],
-
- // CK35-DAG: [[B_END]] = getelementptr ptr, ptr [[B_ADDR]], i{{.+}} 1
-
- // CK35-DAG: [[REM_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
- // CK35-DAG: [[SZ]] = sub i64 [[S_END_INTPTR:%.+]], [[B_END_INTPTR:%.+]]
- // CK35-DAG: [[B_END_INTPTR]] = ptrtoint ptr [[B_END_VOID:%.+]] to i64
- // CK35-DAG: [[S_END_INTPTR]] = ptrtoint ptr [[S_END_VOID:%.+]] to i64
- // CK35-DAG: [[S_END_VOID]] = getelementptr i8, ptr [[S_LAST:%.+]], i{{.+}} 1
- // CK35-64-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i64 15
- // CK35-32-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i32 7
-
- // pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b.
-
- // CK35-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
- // CK35-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-
-
- // CK35-DAG: store ptr [[S_ADDR]], ptr [[BP3]],
- // CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P3]],
+ // CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P2]],
// CK35-DAG: [[B_ADDR]] = load ptr, ptr [[B_REF:%.+]], align {{[0-9]+}}, !nonnull !{{[0-9]+}}, !align !{{[0-9]+}}
// CK35-DAG: [[B_REF]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1
More information about the cfe-commits
mailing list