[llvm] [InstCombine][AMDGPU] Disable PtrReplacer when select has mismatch AS. (PR #98456)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 03:03:38 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (jofrn)
<details>
<summary>Changes</summary>
A select has two paths that must have matching addrspaces if InstCombine is to apply PtrReplacer along them. This change keeps the pointer replacing enabled only if there is no addrspacecast on the path or if both paths have valid addrspacecast.
---
Full diff: https://github.com/llvm/llvm-project/pull/98456.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp (+55)
- (added) llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast-cmemptrreplacer.ll (+48)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4351a55ea1d30..8a31512a0fab6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -270,16 +270,55 @@ class PointerReplacer {
unsigned ToAS = ASC->getDestAddressSpace();
return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
}
+ bool FoundASC(const Value *TrueOp) const;
+ bool hasConflictingAS(const Instruction *I) const;
SmallPtrSet<Instruction *, 32> ValuesToRevisit;
+ SmallPtrSet<Instruction *, 32> ValuesToRevisitAS;
SmallSetVector<Instruction *, 4> Worklist;
MapVector<Value *, Value *> WorkMap;
InstCombinerImpl &IC;
Instruction &Root;
unsigned FromAS;
+ bool HasASC = false;
};
} // end anonymous namespace
+/// Return true iff valid addrspacecast is found on
+/// path from end of branch to root of tree.
+bool PointerReplacer::FoundASC(const Value *Op) const {
+ const Instruction *TI;
+ while ((TI = dyn_cast<Instruction>(Op)) != &Root) {
+ if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Op))
+ if (isEqualOrValidAddrSpaceCast(ASC, FromAS))
+ return true;
+ if (TI && isa<Instruction>(TI->getOperand(0)))
+ Op = TI->getOperand(0);
+ else if (TI)
+ Op = TI->getOperand(1);
+ else break;
+ }
+ return false;
+}
+
+/// Return true iff valid ASCs are found on both true and false
+/// paths from the select to the root alloca.
+bool PointerReplacer::hasConflictingAS(const Instruction *I) const {
+ auto *SI = cast<SelectInst>(I);
+ auto *TI = SI->getTrueValue();
+ auto *FI = SI->getFalseValue();
+
+ bool FoundTrueASC = FoundASC(TI);
+ bool FoundFalseASC = FoundASC(FI);
+
+ bool HasConflictingAS = !FoundFalseASC || !FoundTrueASC;
+ LLVM_DEBUG(
+ dbgs() << "HasConflictingAS: " << HasConflictingAS
+ << "{ False: " << FoundFalseASC
+ << ", True: " << FoundTrueASC << " }\n");
+ return HasConflictingAS;
+}
+
bool PointerReplacer::collectUsers() {
if (!collectUsersRecursive(Root))
return false;
@@ -290,6 +329,13 @@ bool PointerReplacer::collectUsers() {
for (auto *Inst : ValuesToRevisit)
if (!Worklist.contains(Inst))
return false;
+ // For any select found, ensure that addrspacecast is
+ // present and valid on both branches.
+ if (HasASC)
+ for (auto *Inst : ValuesToRevisitAS)
+ if (hasConflictingAS(Inst))
+ return false;
+
return true;
}
@@ -329,6 +375,14 @@ bool PointerReplacer::collectUsersRecursive(Instruction &I) {
ValuesToRevisit.insert(Inst);
continue;
}
+
+ auto *TruePtr = dyn_cast<PointerType>(SI->getTrueValue()->getType());
+ auto *FalsePtr = dyn_cast<PointerType>(SI->getFalseValue()->getType());
+ if (TruePtr && FalsePtr) {
+ if (!IC.isValidAddrSpaceCast(TruePtr->getAddressSpace(), FalsePtr->getAddressSpace()))
+ ValuesToRevisitAS.insert(SI);
+ }
+
Worklist.insert(SI);
if (!collectUsersRecursive(*SI))
return false;
@@ -342,6 +396,7 @@ bool PointerReplacer::collectUsersRecursive(Instruction &I) {
Worklist.insert(Inst);
} else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
Worklist.insert(Inst);
+ HasASC = true;
if (!collectUsersRecursive(*Inst))
return false;
} else if (Inst->isLifetimeStartOrEnd()) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast-cmemptrreplacer.ll b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast-cmemptrreplacer.ll
new file mode 100644
index 0000000000000..8e7856f001ef7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast-cmemptrreplacer.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s
+
+; Variant of select with addrspacecast in one branch on path to root alloca (ic opt is then disabled)
+define void @addrspacecast_true_path(ptr addrspace(4) align 8 byref([2 x i8]) %arg) {
+; CHECK-LABEL: define void @addrspacecast_true_path(ptr addrspace(4) byref([2 x i8]) align 8 %arg) {
+; CHECK-NEXT: %coerce = alloca [2 x i8], align 8, addrspace(5)
+; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 8 dereferenceable(16) %coerce, ptr addrspace(4) noundef align 8 dereferenceable(16) %arg, i64 16, i1 false)
+; CHECK-NEXT: %load.coerce = load i32, ptr addrspace(5) %coerce, align 8
+; CHECK-NEXT: %cmp.i = icmp slt i32 %load.coerce, 10
+; CHECK-NEXT: %inline_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i32 5
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %inline_values.i to ptr
+; CHECK-NEXT: %out_of_line_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i32 6
+; CHECK-NEXT: %2 = load ptr, ptr addrspace(5) %out_of_line_values.i, align 8
+; CHECK-NEXT: %retval.0.i = select i1 %cmp.i, ptr %1, ptr %2
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %retval.0.i)
+; CHECK-NEXT: ret void
+; CHECK-NEXT:}
+ %coerce = alloca [2 x i8], align 8, addrspace(5)
+ call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %coerce, ptr addrspace(4) align 8 %arg, i64 16, i1 false)
+ %load.coerce = load i32, ptr addrspace(5) %coerce, align 8
+ %cmp.i = icmp slt i32 %load.coerce, 10
+ %inline_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 5
+ %2 = addrspacecast ptr addrspace(5) %inline_values.i to ptr
+ %out_of_line_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 6
+ %3 = load ptr, ptr addrspace(5) %out_of_line_values.i, align 8
+ %retval.0.i = select i1 %cmp.i, ptr %2, ptr %3
+ call void @llvm.lifetime.end(i64 8, ptr addrspace(0) %retval.0.i)
+ ret void
+}
+
+; Variant of select with valid addrspacecast in both branches on path to root alloca (ic opt remains enabled)
+define void @addrspacecast_both_paths(ptr addrspace(4) align 8 byref([2 x i8]) %arg) {
+; CHECK-LABEL: define void @addrspacecast_both_paths(ptr addrspace(4) byref([2 x i8]) align 8 %arg) {
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+ %coerce = alloca [2 x i8], align 8, addrspace(5)
+ call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %coerce, ptr addrspace(4) align 8 %arg, i64 16, i1 false)
+ %load.coerce = load i32, ptr addrspace(5) %coerce, align 8
+ %cmp.i = icmp slt i32 %load.coerce, 10
+ %inline_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 5
+ %2 = addrspacecast ptr addrspace(5) %inline_values.i to ptr
+ %in_of_line_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 6
+ %3 = addrspacecast ptr addrspace(5) %in_of_line_values.i to ptr
+ %retval.0.i = select i1 %cmp.i, ptr %2, ptr %3
+ call void @llvm.lifetime.start(i64 8, ptr addrspace(0) %retval.0.i)
+ ret void
+}
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/98456
More information about the llvm-commits
mailing list