[llvm] [mlir] [CodeExtractor] Add align metadata to extracted pointers (PR #131131)
Dominik Adamski via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 08:06:05 PDT 2025
https://github.com/DominikAdamski updated https://github.com/llvm/llvm-project/pull/131131
>From 802fef5f5e76dcdbaed87c1b665d74bce7c9c514 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 11 Mar 2025 07:41:50 -0500
Subject: [PATCH 1/5] [CodeExtractor] Add align metadata to extracted pointers
Moving code to another function can lead to missed optimization opportunities,
because function passes operate on smaller chunks of code,
and they cannot figure out all details.
One example of missed optimization opportunities after code extraction
is information about pointer alignment. The instruction combine pass
adds information about pointer alignment to LLVM intrinsic memcpy calls
if it can deduce it from the code or if align metadata is added.
If this information is not present, then further optimization passes
can generate inefficient code.
If we add align metadata to extracted pointers, then the instruction
combine pass can add the align attribute to the LLVM intrinsic memcpy call
and unblock further optimization.
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 22 +++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 7277603b3ec2b..61d70a1500028 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1604,8 +1604,26 @@ void CodeExtractor::emitFunctionBody(
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructArgTy, AggArg, Idx, "gep_" + inputs[i]->getName(), newFuncRoot);
- RewriteVal = new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
- "loadgep_" + inputs[i]->getName(), newFuncRoot);
+ LoadInst *LoadGEP =
+ new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
+ "loadgep_" + inputs[i]->getName(), newFuncRoot);
+ PointerType *ItemType =
+ dyn_cast<PointerType>(StructArgTy->getElementType(aggIdx));
+ if (ItemType && !LoadGEP->getMetadata(LLVMContext::MD_align)) {
+ unsigned AddressSpace = ItemType->getAddressSpace();
+ unsigned AlignmentValue = oldFunction->getDataLayout()
+ .getPointerPrefAlignment(AddressSpace)
+ .value();
+
+ MDBuilder MDB(header->getContext());
+ LoadGEP->setMetadata(
+ LLVMContext::MD_align,
+ MDNode::get(
+ header->getContext(),
+ MDB.createConstant(ConstantInt::get(
+ Type::getInt64Ty(header->getContext()), AlignmentValue))));
+ }
+ RewriteVal = LoadGEP;
++aggIdx;
} else
RewriteVal = &*ScalarAI++;
>From 3a41608e17f24df357e2d0131c353a182b659024 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Fri, 14 Mar 2025 04:49:40 -0500
Subject: [PATCH 2/5] Applied remarks
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 61d70a1500028..9ec2ba6dc3df3 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1609,11 +1609,11 @@ void CodeExtractor::emitFunctionBody(
"loadgep_" + inputs[i]->getName(), newFuncRoot);
PointerType *ItemType =
dyn_cast<PointerType>(StructArgTy->getElementType(aggIdx));
- if (ItemType && !LoadGEP->getMetadata(LLVMContext::MD_align)) {
+ if (ItemType) {
unsigned AddressSpace = ItemType->getAddressSpace();
- unsigned AlignmentValue = oldFunction->getDataLayout()
- .getPointerPrefAlignment(AddressSpace)
- .value();
+ // Use the same alignment as the one used for struct allocation.
+ unsigned AlignmentValue =
+ oldFunction->getDataLayout().getPrefTypeAlign(StructArgTy).value();
MDBuilder MDB(header->getContext());
LoadGEP->setMetadata(
@@ -1817,7 +1817,8 @@ CallInst *CodeExtractor::emitReplacerCall(
AllocaInst *alloca = new AllocaInst(
output->getType(), DL.getAllocaAddrSpace(), nullptr,
- output->getName() + ".loc", AllocaBlock->getFirstInsertionPt());
+ DL.getPrefTypeAlign(output->getType()), output->getName() + ".loc",
+ AllocaBlock->getFirstInsertionPt());
params.push_back(alloca);
ReloadOutputs.push_back(alloca);
}
@@ -1825,7 +1826,8 @@ CallInst *CodeExtractor::emitReplacerCall(
AllocaInst *Struct = nullptr;
if (!StructValues.empty()) {
Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
- "structArg", AllocaBlock->getFirstInsertionPt());
+ DL.getPrefTypeAlign(StructArgTy), "structArg",
+ AllocaBlock->getFirstInsertionPt());
if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) {
auto *StructSpaceCast = new AddrSpaceCastInst(
Struct, PointerType ::get(Context, 0), "structArg.ascast");
>From abdb64b0c2e73a9e72a6d37c47aa4028fc1196f9 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Mon, 31 Mar 2025 10:03:24 -0500
Subject: [PATCH 3/5] Revert "Applied remarks"
This reverts commit 3a41608e17f24df357e2d0131c353a182b659024.
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 9ec2ba6dc3df3..61d70a1500028 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1609,11 +1609,11 @@ void CodeExtractor::emitFunctionBody(
"loadgep_" + inputs[i]->getName(), newFuncRoot);
PointerType *ItemType =
dyn_cast<PointerType>(StructArgTy->getElementType(aggIdx));
- if (ItemType) {
+ if (ItemType && !LoadGEP->getMetadata(LLVMContext::MD_align)) {
unsigned AddressSpace = ItemType->getAddressSpace();
- // Use the same alignment as the one used for struct allocation.
- unsigned AlignmentValue =
- oldFunction->getDataLayout().getPrefTypeAlign(StructArgTy).value();
+ unsigned AlignmentValue = oldFunction->getDataLayout()
+ .getPointerPrefAlignment(AddressSpace)
+ .value();
MDBuilder MDB(header->getContext());
LoadGEP->setMetadata(
@@ -1817,8 +1817,7 @@ CallInst *CodeExtractor::emitReplacerCall(
AllocaInst *alloca = new AllocaInst(
output->getType(), DL.getAllocaAddrSpace(), nullptr,
- DL.getPrefTypeAlign(output->getType()), output->getName() + ".loc",
- AllocaBlock->getFirstInsertionPt());
+ output->getName() + ".loc", AllocaBlock->getFirstInsertionPt());
params.push_back(alloca);
ReloadOutputs.push_back(alloca);
}
@@ -1826,8 +1825,7 @@ CallInst *CodeExtractor::emitReplacerCall(
AllocaInst *Struct = nullptr;
if (!StructValues.empty()) {
Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
- DL.getPrefTypeAlign(StructArgTy), "structArg",
- AllocaBlock->getFirstInsertionPt());
+ "structArg", AllocaBlock->getFirstInsertionPt());
if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) {
auto *StructSpaceCast = new AddrSpaceCastInst(
Struct, PointerType ::get(Context, 0), "structArg.ascast");
>From bb57c17e12ac64bf5d27b0e5dda478a0adc7fb1a Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Mon, 31 Mar 2025 10:03:47 -0500
Subject: [PATCH 4/5] Revert "[CodeExtractor] Add align metadata to extracted
pointers"
This reverts commit 802fef5f5e76dcdbaed87c1b665d74bce7c9c514.
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 22 ++-------------------
1 file changed, 2 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 61d70a1500028..7277603b3ec2b 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1604,26 +1604,8 @@ void CodeExtractor::emitFunctionBody(
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructArgTy, AggArg, Idx, "gep_" + inputs[i]->getName(), newFuncRoot);
- LoadInst *LoadGEP =
- new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
- "loadgep_" + inputs[i]->getName(), newFuncRoot);
- PointerType *ItemType =
- dyn_cast<PointerType>(StructArgTy->getElementType(aggIdx));
- if (ItemType && !LoadGEP->getMetadata(LLVMContext::MD_align)) {
- unsigned AddressSpace = ItemType->getAddressSpace();
- unsigned AlignmentValue = oldFunction->getDataLayout()
- .getPointerPrefAlignment(AddressSpace)
- .value();
-
- MDBuilder MDB(header->getContext());
- LoadGEP->setMetadata(
- LLVMContext::MD_align,
- MDNode::get(
- header->getContext(),
- MDB.createConstant(ConstantInt::get(
- Type::getInt64Ty(header->getContext()), AlignmentValue))));
- }
- RewriteVal = LoadGEP;
+ RewriteVal = new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
+ "loadgep_" + inputs[i]->getName(), newFuncRoot);
++aggIdx;
} else
RewriteVal = &*ScalarAI++;
>From d23206f7a57a8699fb237dbfe9f765ef250c69da Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Thu, 27 Mar 2025 06:04:49 -0500
Subject: [PATCH 5/5] [OpenMP][CodeExtractor]Add align metadata to load
instructions
LLVM IR language reference manual states that align metadata
tells the optimizer that the value loaded is known to be aligned
to a boundary specified by the integer value in the metadata node.
This information is used by the optimizer, for example,
to generate more efficient memcpy calls. The LLVM Optimizer requires
align metadata to generate optimized code because information about
the alignment of objects is lost during OpenMP target code generation
(outlining of loop body helper function).
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 40 +++++++-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 23 ++++-
.../omptarget-memcpy-align-metadata.mlir | 92 +++++++++++++++++++
3 files changed, 150 insertions(+), 5 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 216db3a4570f7..e05f5524683cb 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -248,6 +248,19 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
return Result;
}
+static bool isAlignmentPreservedForAddrCast(const Triple &TargetTriple) {
+ switch (TargetTriple.getArch()) {
+ case Triple::ArchType::amdgcn:
+ case Triple::ArchType::r600:
+ return true;
+ // TODO: Add other architectures for which we are certain that alignment
+ // is preserved during address space cast operations.
+ default:
+ return false;
+ }
+ return false;
+}
+
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI, AssumptionCache *AC,
@@ -1612,8 +1625,31 @@ void CodeExtractor::emitFunctionBody(
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructArgTy, AggArg, Idx, "gep_" + inputs[i]->getName(), newFuncRoot);
- RewriteVal = new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
- "loadgep_" + inputs[i]->getName(), newFuncRoot);
+ LoadInst *LoadGEP =
+ new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
+ "loadgep_" + inputs[i]->getName(), newFuncRoot);
+ if (StructArgTy->getElementType(aggIdx)->isPointerTy()) {
+ unsigned AlignmentValue;
+ const Triple &TargetTriple =
+ newFunction->getParent()->getTargetTriple();
+ const DataLayout &DL = header->getDataLayout();
+ // Pointers without casting can provide more information about
+ // alignment. Use pointers without casts if given target preserves
+ // alignment information for cast the operation.
+ if (isAlignmentPreservedForAddrCast(TargetTriple))
+ AlignmentValue =
+ inputs[i]->stripPointerCasts()->getPointerAlignment(DL).value();
+ else
+ AlignmentValue = inputs[i]->getPointerAlignment(DL).value();
+ MDBuilder MDB(header->getContext());
+ LoadGEP->setMetadata(
+ LLVMContext::MD_align,
+ MDNode::get(
+ header->getContext(),
+ MDB.createConstant(ConstantInt::get(
+ Type::getInt64Ty(header->getContext()), AlignmentValue))));
+ }
+ RewriteVal = LoadGEP;
++aggIdx;
} else
RewriteVal = &*ScalarAI++;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d41489921bd13..d4bd8839854ce 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/TargetParser/Triple.h"
@@ -4407,13 +4408,17 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
builder.restoreIP(allocaIP);
omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
-
+ LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
+ ompBuilder.M.getContext());
+ unsigned alignmentValue = 0;
// Find the associated MapInfoData entry for the current input
for (size_t i = 0; i < mapData.MapClause.size(); ++i)
if (mapData.OriginalValue[i] == input) {
auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
capture = mapOp.getMapCaptureType();
-
+ // Get information of alignment of mapped object
+ alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
+ mapOp.getVarType(), ompBuilder.M.getDataLayout());
break;
}
@@ -4437,9 +4442,21 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
break;
}
case omp::VariableCaptureKind::ByRef: {
- retVal = builder.CreateAlignedLoad(
+ llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
v->getType(), v,
ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
+ // Add information about alignment of objects that are mapped by reference
+ if (v->getType()->isPointerTy() && alignmentValue) {
+ llvm::MDBuilder MDB(builder.getContext());
+ loadInst->setMetadata(
+ llvm::LLVMContext::MD_align,
+ llvm::MDNode::get(builder.getContext(),
+ MDB.createConstant(llvm::ConstantInt::get(
+ llvm::Type::getInt64Ty(builder.getContext()),
+ alignmentValue))));
+ }
+ retVal = loadInst;
+
break;
}
case omp::VariableCaptureKind::This:
diff --git a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
new file mode 100644
index 0000000000000..633df96866885
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
@@ -0,0 +1,92 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// The aim of this test is to verfiy that information of
+// alignment of loaded objects is passed to outlined
+// functions.
+
+module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
+ omp.private {type = private} @_QFEk_private_i32 : i32
+ llvm.func @_QQmain() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+ %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+ %12 = llvm.mlir.constant(1 : i64) : i64
+ %13 = llvm.alloca %12 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
+ %14 = llvm.addrspacecast %13 : !llvm.ptr<5> to !llvm.ptr
+ %15 = llvm.mlir.constant(1 : i64) : i64
+ %16 = llvm.alloca %15 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
+ %17 = llvm.addrspacecast %16 : !llvm.ptr<5> to !llvm.ptr
+ %19 = llvm.mlir.constant(1 : index) : i64
+ %20 = llvm.mlir.constant(0 : index) : i64
+ %22 = llvm.mlir.addressof @_QFEa : !llvm.ptr
+ %25 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
+ %60 = llvm.getelementptr %8[0, 7, %20, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %61 = llvm.load %60 : !llvm.ptr -> i64
+ %62 = llvm.getelementptr %8[0, 7, %20, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %63 = llvm.load %62 : !llvm.ptr -> i64
+ %64 = llvm.getelementptr %8[0, 7, %20, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %65 = llvm.load %64 : !llvm.ptr -> i64
+ %66 = llvm.sub %63, %19 : i64
+ %67 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%66 : i64) extent(%63 : i64) stride(%65 : i64) start_idx(%61 : i64) {stride_in_bytes = true}
+ %68 = llvm.getelementptr %22[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %69 = omp.map.info var_ptr(%22 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%68 : !llvm.ptr) bounds(%67) -> !llvm.ptr {name = ""}
+ %70 = omp.map.info var_ptr(%22 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%69 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
+ %71 = omp.map.info var_ptr(%17 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
+ %72 = omp.map.info var_ptr(%14 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
+ %73 = omp.map.info var_ptr(%25 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
+ omp.target map_entries(%70 -> %arg0, %71 -> %arg1, %72 -> %arg2, %73 -> %arg3, %69 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ %106 = llvm.mlir.constant(0 : index) : i64
+ %107 = llvm.mlir.constant(13 : i32) : i32
+ %108 = llvm.mlir.constant(1000 : i32) : i32
+ %109 = llvm.mlir.constant(1 : i32) : i32
+ omp.teams {
+ omp.parallel private(@_QFEk_private_i32 %arg2 -> %arg5 : !llvm.ptr) {
+ %110 = llvm.mlir.constant(1 : i32) : i32
+ %111 = llvm.alloca %110 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+ %112 = llvm.addrspacecast %111 : !llvm.ptr<5> to !llvm.ptr
+ omp.distribute {
+ omp.wsloop {
+ omp.loop_nest (%arg6) : i32 = (%109) to (%108) inclusive step (%109) {
+ llvm.store %arg6, %arg5 : i32, !llvm.ptr
+ %115 = llvm.mlir.constant(48 : i32) : i32
+ "llvm.intr.memcpy"(%112, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ omp.yield
+ }
+ } {omp.composite}
+ } {omp.composite}
+ omp.terminator
+ } {omp.composite}
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+ }
+ llvm.mlir.global internal @_QFEa() {addr_space = 0 : i32} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {
+ %6 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.return %6 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ }
+ llvm.mlir.global internal constant @_QFECnz() {addr_space = 0 : i32} : i32 {
+ %0 = llvm.mlir.constant(1000 : i32) : i32
+ llvm.return %0 : i32
+ }
+}
+
+// CHECK: call void @__kmpc_distribute_for_static_loop_4u(
+// CHECK-SAME: ptr addrspacecast (ptr addrspace(1) @[[GLOB:[0-9]+]] to ptr),
+// CHECK-SAME: ptr @[[LOOP_BODY_FUNC:.*]], ptr %[[LOOP_BODY_FUNC_ARG:.*]],
+// CHEKC-SAME i32 1000, i32 %1, i32 0, i32 0)
+
+
+// CHECK: define internal void @[[LOOP_BODY_FUNC]](i32 %[[CNT:.*]], ptr %[[LOOP_BODY_ARG_PTR:.*]]) #[[ATTRS:[0-9]+]] {
+// CHECK: %[[GEP_PTR_0:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 0
+// CHECK: %[[INT_PTR:.*]] = load ptr, ptr %[[GEP_PTR_0]], align 8, !align ![[ALIGN_INT:[0-9]+]]
+// CHECK: %[[GEP_PTR_1:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 1
+// CHECK: %[[STRUCT_PTR_0:.*]] = load ptr, ptr %[[GEP_PTR_1]], align 8, !align ![[ALIGN_STRUCT:[0-9]+]]
+// CHECK: %[[GEP_PTR_2:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 2
+// CHECK: %[[STRUCT_PTR_1:.*]] = load ptr, ptr %[[GEP_PTR_2]], align 8, !align ![[ALIGN_STRUCT:[0-9]+]]
+// CHECK: store i32 %[[DATA_INT:.*]], ptr %[[INT_PTR]], align 4
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[STRUCT_PTR_0]], ptr %[[STRUCT_PTR_1]], i32 48, i1 false)
+
+// CHECK: ![[ALIGN_STRUCT]] = !{i64 8}
+// CHECK: ![[ALIGN_INT]] = !{i64 4}
More information about the llvm-commits
mailing list