[llvm] [mlir] [OpenMP][CodeExtractor]Add align metadata to load instructions (PR #131131)
Dominik Adamski via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 8 05:22:17 PDT 2025
https://github.com/DominikAdamski updated https://github.com/llvm/llvm-project/pull/131131
>From 802fef5f5e76dcdbaed87c1b665d74bce7c9c514 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 11 Mar 2025 07:41:50 -0500
Subject: [PATCH 1/6] [CodeExtractor] Add align metadata to extracted pointers
Moving code to another function can lead to missed optimization opportunities,
because function passes operate on smaller chunks of code,
and they cannot figure out all details.
One example of missed optimization opportunities after code extraction
is information about pointer alignment. The instruction combine pass
adds information about pointer alignment to LLVM intrinsic memcpy calls
if it can deduce it from the code or if align metadata is added.
If this information is not present, then further optimization passes
can generate inefficient code.
If we add align metadata to extracted pointers, then the instruction
combine pass can add the align attribute to the LLVM intrinsic memcpy call
and unblock further optimization.
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 22 +++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 7277603b3ec2b..61d70a1500028 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1604,8 +1604,26 @@ void CodeExtractor::emitFunctionBody(
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructArgTy, AggArg, Idx, "gep_" + inputs[i]->getName(), newFuncRoot);
- RewriteVal = new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
- "loadgep_" + inputs[i]->getName(), newFuncRoot);
+ LoadInst *LoadGEP =
+ new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
+ "loadgep_" + inputs[i]->getName(), newFuncRoot);
+ PointerType *ItemType =
+ dyn_cast<PointerType>(StructArgTy->getElementType(aggIdx));
+ if (ItemType && !LoadGEP->getMetadata(LLVMContext::MD_align)) {
+ unsigned AddressSpace = ItemType->getAddressSpace();
+ unsigned AlignmentValue = oldFunction->getDataLayout()
+ .getPointerPrefAlignment(AddressSpace)
+ .value();
+
+ MDBuilder MDB(header->getContext());
+ LoadGEP->setMetadata(
+ LLVMContext::MD_align,
+ MDNode::get(
+ header->getContext(),
+ MDB.createConstant(ConstantInt::get(
+ Type::getInt64Ty(header->getContext()), AlignmentValue))));
+ }
+ RewriteVal = LoadGEP;
++aggIdx;
} else
RewriteVal = &*ScalarAI++;
>From 3a41608e17f24df357e2d0131c353a182b659024 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Fri, 14 Mar 2025 04:49:40 -0500
Subject: [PATCH 2/6] Applied remarks
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 61d70a1500028..9ec2ba6dc3df3 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1609,11 +1609,11 @@ void CodeExtractor::emitFunctionBody(
"loadgep_" + inputs[i]->getName(), newFuncRoot);
PointerType *ItemType =
dyn_cast<PointerType>(StructArgTy->getElementType(aggIdx));
- if (ItemType && !LoadGEP->getMetadata(LLVMContext::MD_align)) {
+ if (ItemType) {
unsigned AddressSpace = ItemType->getAddressSpace();
- unsigned AlignmentValue = oldFunction->getDataLayout()
- .getPointerPrefAlignment(AddressSpace)
- .value();
+ // Use the same alignment as the one used for struct allocation.
+ unsigned AlignmentValue =
+ oldFunction->getDataLayout().getPrefTypeAlign(StructArgTy).value();
MDBuilder MDB(header->getContext());
LoadGEP->setMetadata(
@@ -1817,7 +1817,8 @@ CallInst *CodeExtractor::emitReplacerCall(
AllocaInst *alloca = new AllocaInst(
output->getType(), DL.getAllocaAddrSpace(), nullptr,
- output->getName() + ".loc", AllocaBlock->getFirstInsertionPt());
+ DL.getPrefTypeAlign(output->getType()), output->getName() + ".loc",
+ AllocaBlock->getFirstInsertionPt());
params.push_back(alloca);
ReloadOutputs.push_back(alloca);
}
@@ -1825,7 +1826,8 @@ CallInst *CodeExtractor::emitReplacerCall(
AllocaInst *Struct = nullptr;
if (!StructValues.empty()) {
Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
- "structArg", AllocaBlock->getFirstInsertionPt());
+ DL.getPrefTypeAlign(StructArgTy), "structArg",
+ AllocaBlock->getFirstInsertionPt());
if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) {
auto *StructSpaceCast = new AddrSpaceCastInst(
Struct, PointerType ::get(Context, 0), "structArg.ascast");
>From abdb64b0c2e73a9e72a6d37c47aa4028fc1196f9 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Mon, 31 Mar 2025 10:03:24 -0500
Subject: [PATCH 3/6] Revert "Applied remarks"
This reverts commit 3a41608e17f24df357e2d0131c353a182b659024.
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 9ec2ba6dc3df3..61d70a1500028 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1609,11 +1609,11 @@ void CodeExtractor::emitFunctionBody(
"loadgep_" + inputs[i]->getName(), newFuncRoot);
PointerType *ItemType =
dyn_cast<PointerType>(StructArgTy->getElementType(aggIdx));
- if (ItemType) {
+ if (ItemType && !LoadGEP->getMetadata(LLVMContext::MD_align)) {
unsigned AddressSpace = ItemType->getAddressSpace();
- // Use the same alignment as the one used for struct allocation.
- unsigned AlignmentValue =
- oldFunction->getDataLayout().getPrefTypeAlign(StructArgTy).value();
+ unsigned AlignmentValue = oldFunction->getDataLayout()
+ .getPointerPrefAlignment(AddressSpace)
+ .value();
MDBuilder MDB(header->getContext());
LoadGEP->setMetadata(
@@ -1817,8 +1817,7 @@ CallInst *CodeExtractor::emitReplacerCall(
AllocaInst *alloca = new AllocaInst(
output->getType(), DL.getAllocaAddrSpace(), nullptr,
- DL.getPrefTypeAlign(output->getType()), output->getName() + ".loc",
- AllocaBlock->getFirstInsertionPt());
+ output->getName() + ".loc", AllocaBlock->getFirstInsertionPt());
params.push_back(alloca);
ReloadOutputs.push_back(alloca);
}
@@ -1826,8 +1825,7 @@ CallInst *CodeExtractor::emitReplacerCall(
AllocaInst *Struct = nullptr;
if (!StructValues.empty()) {
Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
- DL.getPrefTypeAlign(StructArgTy), "structArg",
- AllocaBlock->getFirstInsertionPt());
+ "structArg", AllocaBlock->getFirstInsertionPt());
if (ArgsInZeroAddressSpace && DL.getAllocaAddrSpace() != 0) {
auto *StructSpaceCast = new AddrSpaceCastInst(
Struct, PointerType ::get(Context, 0), "structArg.ascast");
>From bb57c17e12ac64bf5d27b0e5dda478a0adc7fb1a Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Mon, 31 Mar 2025 10:03:47 -0500
Subject: [PATCH 4/6] Revert "[CodeExtractor] Add align metadata to extracted
pointers"
This reverts commit 802fef5f5e76dcdbaed87c1b665d74bce7c9c514.
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 22 ++-------------------
1 file changed, 2 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 61d70a1500028..7277603b3ec2b 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1604,26 +1604,8 @@ void CodeExtractor::emitFunctionBody(
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructArgTy, AggArg, Idx, "gep_" + inputs[i]->getName(), newFuncRoot);
- LoadInst *LoadGEP =
- new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
- "loadgep_" + inputs[i]->getName(), newFuncRoot);
- PointerType *ItemType =
- dyn_cast<PointerType>(StructArgTy->getElementType(aggIdx));
- if (ItemType && !LoadGEP->getMetadata(LLVMContext::MD_align)) {
- unsigned AddressSpace = ItemType->getAddressSpace();
- unsigned AlignmentValue = oldFunction->getDataLayout()
- .getPointerPrefAlignment(AddressSpace)
- .value();
-
- MDBuilder MDB(header->getContext());
- LoadGEP->setMetadata(
- LLVMContext::MD_align,
- MDNode::get(
- header->getContext(),
- MDB.createConstant(ConstantInt::get(
- Type::getInt64Ty(header->getContext()), AlignmentValue))));
- }
- RewriteVal = LoadGEP;
+ RewriteVal = new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
+ "loadgep_" + inputs[i]->getName(), newFuncRoot);
++aggIdx;
} else
RewriteVal = &*ScalarAI++;
>From d23206f7a57a8699fb237dbfe9f765ef250c69da Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Thu, 27 Mar 2025 06:04:49 -0500
Subject: [PATCH 5/6] [OpenMP][CodeExtractor]Add align metadata to load
instructions
LLVM IR language reference manual states that align metadata
tells the optimizer that the value loaded is known to be aligned
to a boundary specified by the integer value in the metadata node.
This information is used by the optimizer, for example,
to generate more efficient memcpy calls. The LLVM Optimizer requires
align metadata to generate optimized code because information about
the alignment of objects is lost during OpenMP target code generation
(outlining of loop body helper function).
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 40 +++++++-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 23 ++++-
.../omptarget-memcpy-align-metadata.mlir | 92 +++++++++++++++++++
3 files changed, 150 insertions(+), 5 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 216db3a4570f7..e05f5524683cb 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -248,6 +248,19 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
return Result;
}
+static bool isAlignmentPreservedForAddrCast(const Triple &TargetTriple) {
+ switch (TargetTriple.getArch()) {
+ case Triple::ArchType::amdgcn:
+ case Triple::ArchType::r600:
+ return true;
+ // TODO: Add other architectures for which we are certain that alignment
+ // is preserved during address space cast operations.
+ default:
+ return false;
+ }
+ return false;
+}
+
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI, AssumptionCache *AC,
@@ -1612,8 +1625,31 @@ void CodeExtractor::emitFunctionBody(
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructArgTy, AggArg, Idx, "gep_" + inputs[i]->getName(), newFuncRoot);
- RewriteVal = new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
- "loadgep_" + inputs[i]->getName(), newFuncRoot);
+ LoadInst *LoadGEP =
+ new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
+ "loadgep_" + inputs[i]->getName(), newFuncRoot);
+ if (StructArgTy->getElementType(aggIdx)->isPointerTy()) {
+ unsigned AlignmentValue;
+ const Triple &TargetTriple =
+ newFunction->getParent()->getTargetTriple();
+ const DataLayout &DL = header->getDataLayout();
+ // Pointers without casting can provide more information about
+ // alignment. Use pointers without casts if given target preserves
+ // alignment information for cast the operation.
+ if (isAlignmentPreservedForAddrCast(TargetTriple))
+ AlignmentValue =
+ inputs[i]->stripPointerCasts()->getPointerAlignment(DL).value();
+ else
+ AlignmentValue = inputs[i]->getPointerAlignment(DL).value();
+ MDBuilder MDB(header->getContext());
+ LoadGEP->setMetadata(
+ LLVMContext::MD_align,
+ MDNode::get(
+ header->getContext(),
+ MDB.createConstant(ConstantInt::get(
+ Type::getInt64Ty(header->getContext()), AlignmentValue))));
+ }
+ RewriteVal = LoadGEP;
++aggIdx;
} else
RewriteVal = &*ScalarAI++;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d41489921bd13..d4bd8839854ce 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/TargetParser/Triple.h"
@@ -4407,13 +4408,17 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
builder.restoreIP(allocaIP);
omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
-
+ LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
+ ompBuilder.M.getContext());
+ unsigned alignmentValue = 0;
// Find the associated MapInfoData entry for the current input
for (size_t i = 0; i < mapData.MapClause.size(); ++i)
if (mapData.OriginalValue[i] == input) {
auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
capture = mapOp.getMapCaptureType();
-
+ // Get information of alignment of mapped object
+ alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
+ mapOp.getVarType(), ompBuilder.M.getDataLayout());
break;
}
@@ -4437,9 +4442,21 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
break;
}
case omp::VariableCaptureKind::ByRef: {
- retVal = builder.CreateAlignedLoad(
+ llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
v->getType(), v,
ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
+ // Add information about alignment of objects that are mapped by reference
+ if (v->getType()->isPointerTy() && alignmentValue) {
+ llvm::MDBuilder MDB(builder.getContext());
+ loadInst->setMetadata(
+ llvm::LLVMContext::MD_align,
+ llvm::MDNode::get(builder.getContext(),
+ MDB.createConstant(llvm::ConstantInt::get(
+ llvm::Type::getInt64Ty(builder.getContext()),
+ alignmentValue))));
+ }
+ retVal = loadInst;
+
break;
}
case omp::VariableCaptureKind::This:
diff --git a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
new file mode 100644
index 0000000000000..633df96866885
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
@@ -0,0 +1,92 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// The aim of this test is to verfiy that information of
+// alignment of loaded objects is passed to outlined
+// functions.
+
+module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
+ omp.private {type = private} @_QFEk_private_i32 : i32
+ llvm.func @_QQmain() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+ %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+ %12 = llvm.mlir.constant(1 : i64) : i64
+ %13 = llvm.alloca %12 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
+ %14 = llvm.addrspacecast %13 : !llvm.ptr<5> to !llvm.ptr
+ %15 = llvm.mlir.constant(1 : i64) : i64
+ %16 = llvm.alloca %15 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
+ %17 = llvm.addrspacecast %16 : !llvm.ptr<5> to !llvm.ptr
+ %19 = llvm.mlir.constant(1 : index) : i64
+ %20 = llvm.mlir.constant(0 : index) : i64
+ %22 = llvm.mlir.addressof @_QFEa : !llvm.ptr
+ %25 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
+ %60 = llvm.getelementptr %8[0, 7, %20, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %61 = llvm.load %60 : !llvm.ptr -> i64
+ %62 = llvm.getelementptr %8[0, 7, %20, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %63 = llvm.load %62 : !llvm.ptr -> i64
+ %64 = llvm.getelementptr %8[0, 7, %20, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %65 = llvm.load %64 : !llvm.ptr -> i64
+ %66 = llvm.sub %63, %19 : i64
+ %67 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%66 : i64) extent(%63 : i64) stride(%65 : i64) start_idx(%61 : i64) {stride_in_bytes = true}
+ %68 = llvm.getelementptr %22[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %69 = omp.map.info var_ptr(%22 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%68 : !llvm.ptr) bounds(%67) -> !llvm.ptr {name = ""}
+ %70 = omp.map.info var_ptr(%22 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%69 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
+ %71 = omp.map.info var_ptr(%17 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
+ %72 = omp.map.info var_ptr(%14 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
+ %73 = omp.map.info var_ptr(%25 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
+ omp.target map_entries(%70 -> %arg0, %71 -> %arg1, %72 -> %arg2, %73 -> %arg3, %69 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ %106 = llvm.mlir.constant(0 : index) : i64
+ %107 = llvm.mlir.constant(13 : i32) : i32
+ %108 = llvm.mlir.constant(1000 : i32) : i32
+ %109 = llvm.mlir.constant(1 : i32) : i32
+ omp.teams {
+ omp.parallel private(@_QFEk_private_i32 %arg2 -> %arg5 : !llvm.ptr) {
+ %110 = llvm.mlir.constant(1 : i32) : i32
+ %111 = llvm.alloca %110 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+ %112 = llvm.addrspacecast %111 : !llvm.ptr<5> to !llvm.ptr
+ omp.distribute {
+ omp.wsloop {
+ omp.loop_nest (%arg6) : i32 = (%109) to (%108) inclusive step (%109) {
+ llvm.store %arg6, %arg5 : i32, !llvm.ptr
+ %115 = llvm.mlir.constant(48 : i32) : i32
+ "llvm.intr.memcpy"(%112, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+ omp.yield
+ }
+ } {omp.composite}
+ } {omp.composite}
+ omp.terminator
+ } {omp.composite}
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+ }
+ llvm.mlir.global internal @_QFEa() {addr_space = 0 : i32} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {
+ %6 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ llvm.return %6 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ }
+ llvm.mlir.global internal constant @_QFECnz() {addr_space = 0 : i32} : i32 {
+ %0 = llvm.mlir.constant(1000 : i32) : i32
+ llvm.return %0 : i32
+ }
+}
+
+// CHECK: call void @__kmpc_distribute_for_static_loop_4u(
+// CHECK-SAME: ptr addrspacecast (ptr addrspace(1) @[[GLOB:[0-9]+]] to ptr),
+// CHECK-SAME: ptr @[[LOOP_BODY_FUNC:.*]], ptr %[[LOOP_BODY_FUNC_ARG:.*]],
+// CHEKC-SAME i32 1000, i32 %1, i32 0, i32 0)
+
+
+// CHECK: define internal void @[[LOOP_BODY_FUNC]](i32 %[[CNT:.*]], ptr %[[LOOP_BODY_ARG_PTR:.*]]) #[[ATTRS:[0-9]+]] {
+// CHECK: %[[GEP_PTR_0:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 0
+// CHECK: %[[INT_PTR:.*]] = load ptr, ptr %[[GEP_PTR_0]], align 8, !align ![[ALIGN_INT:[0-9]+]]
+// CHECK: %[[GEP_PTR_1:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 1
+// CHECK: %[[STRUCT_PTR_0:.*]] = load ptr, ptr %[[GEP_PTR_1]], align 8, !align ![[ALIGN_STRUCT:[0-9]+]]
+// CHECK: %[[GEP_PTR_2:.*]] = getelementptr { ptr, ptr, ptr }, ptr %[[LOOP_BODY_ARG_PTR]], i32 0, i32 2
+// CHECK: %[[STRUCT_PTR_1:.*]] = load ptr, ptr %[[GEP_PTR_2]], align 8, !align ![[ALIGN_STRUCT:[0-9]+]]
+// CHECK: store i32 %[[DATA_INT:.*]], ptr %[[INT_PTR]], align 4
+// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[STRUCT_PTR_0]], ptr %[[STRUCT_PTR_1]], i32 48, i1 false)
+
+// CHECK: ![[ALIGN_STRUCT]] = !{i64 8}
+// CHECK: ![[ALIGN_INT]] = !{i64 4}
>From 7e737d1c73b057e1f3ca8df278eba07de7bc14aa Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 8 Apr 2025 07:13:27 -0500
Subject: [PATCH 6/6] Applied remarks
---
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 13 +++++++++++++
.../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 15 ++++++++++++++-
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index e05f5524683cb..2b055020022be 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -248,6 +248,8 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
return Result;
}
+/// isAlignmentPreservedForAddrCast - Return true if the cast operation
+/// for specified target preserves original alignment
static bool isAlignmentPreservedForAddrCast(const Triple &TargetTriple) {
switch (TargetTriple.getArch()) {
case Triple::ArchType::amdgcn:
@@ -1628,6 +1630,17 @@ void CodeExtractor::emitFunctionBody(
LoadInst *LoadGEP =
new LoadInst(StructArgTy->getElementType(aggIdx), GEP,
"loadgep_" + inputs[i]->getName(), newFuncRoot);
+ // If we load pointer, we can add optional !align metadata
+ // The existence of the !align metadata on the instruction tells
+ // the optimizer that the value loaded is known to be aligned to
+ // a boundary specified by the integer value in the metadata node.
+ // Example:
+ // %res = load ptr, ptr %input, align 8, !align !align_md_node
+ // ^ ^
+ // | |
+ // alignment of %input address |
+ // |
+ // alignment of %res object
if (StructArgTy->getElementType(aggIdx)->isPointerTy()) {
unsigned AlignmentValue;
const Triple &TargetTriple =
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d4bd8839854ce..f59656f65c9df 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4445,7 +4445,20 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
llvm::LoadInst *loadInst = builder.CreateAlignedLoad(
v->getType(), v,
ompBuilder.M.getDataLayout().getPrefTypeAlign(v->getType()));
- // Add information about alignment of objects that are mapped by reference
+ // CreateAlignedLoad function creates similar LLVM IR:
+ // %res = load ptr, ptr %input, align 8
+ // This LLVM IR does not contain information about alignment
+ // of the loaded value. We need to add !align metadata to unblock
+ // optimizer. The existence of the !align metadata on the instruction
+ // tells the optimizer that the value loaded is known to be aligned to
+ // a boundary specified by the integer value in the metadata node.
+ // Example:
+ // %res = load ptr, ptr %input, align 8, !align !align_md_node
+ // ^ ^
+ // | |
+ // alignment of %input address |
+ // |
+ // alignment of %res object
if (v->getType()->isPointerTy() && alignmentValue) {
llvm::MDBuilder MDB(builder.getContext());
loadInst->setMetadata(
More information about the llvm-commits
mailing list