[llvm] [InstCombine][AMDGPU] Disable PtrReplacer when select has mismatch AS. (PR #98456)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 13:25:10 PDT 2024
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/98456
>From 1fc315a64bb7e10981d88bcba5f519f1ea865b9f Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Thu, 11 Jul 2024 05:38:07 -0400
Subject: [PATCH] [InstCombine][AMDGPU] Disable PtrReplacer when select has
mismatch AS.
A select has two paths that must have matching addrspaces if InstCombine
is to apply PtrReplacer along them. Keep the pointer replacing enabled only if
there is no addrspacecast on the path or if both paths have valid
addrspacecast.
---
.../InstCombineLoadStoreAlloca.cpp | 48 +++++++++++++++++++
.../AMDGPU/addrspacecast-cmemptrreplacer.ll | 48 +++++++++++++++++++
2 files changed, 96 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast-cmemptrreplacer.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 21d5e1dece024..5269df77e88ae 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -270,16 +270,54 @@ class PointerReplacer {
unsigned ToAS = ASC->getDestAddressSpace();
return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
}
+ bool foundASC(const Value *Op) const;
+ bool hasConflictingAS(const SelectInst *I) const;
SmallPtrSet<Instruction *, 32> ValuesToRevisit;
+ SmallPtrSet<Instruction *, 32> ValuesToRevisitAS;
SmallSetVector<Instruction *, 4> Worklist;
MapVector<Value *, Value *> WorkMap;
InstCombinerImpl &IC;
Instruction &Root;
unsigned FromAS;
+ bool HasASC = false;
};
} // end anonymous namespace
+/// Return true iff valid addrspacecast is found on
+/// path from end of branch to root of tree.
+bool PointerReplacer::foundASC(const Value *Op) const {
+ const Instruction *TI;
+ while ((TI = dyn_cast<Instruction>(Op)) != &Root) {
+ if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Op))
+ if (isEqualOrValidAddrSpaceCast(ASC, FromAS))
+ return true;
+ if (TI && isa<Instruction>(TI->getOperand(0)))
+ Op = TI->getOperand(0);
+ else if (TI)
+ Op = TI->getOperand(1);
+ else
+ break;
+ }
+ return false;
+}
+
+/// Return true iff valid ASCs are found on both true and false
+/// paths from the select to the root alloca.
+bool PointerReplacer::hasConflictingAS(const SelectInst *I) const {
+ auto *SI = cast<SelectInst>(I);
+ auto *TI = SI->getTrueValue();
+ auto *FI = SI->getFalseValue();
+
+ bool FoundTrueASC = foundASC(TI);
+ bool FoundFalseASC = foundASC(FI);
+
+ bool HasConflictingAS = !FoundFalseASC || !FoundTrueASC;
+ LLVM_DEBUG(dbgs() << "HasConflictingAS: " << HasConflictingAS << "{ False: "
+ << FoundFalseASC << ", True: " << FoundTrueASC << " }\n");
+ return HasConflictingAS;
+}
+
bool PointerReplacer::collectUsers() {
if (!collectUsersRecursive(Root))
return false;
@@ -290,6 +328,13 @@ bool PointerReplacer::collectUsers() {
for (auto *Inst : ValuesToRevisit)
if (!Worklist.contains(Inst))
return false;
+ // For any select found, ensure that addrspacecast is
+ // present and valid on both branches.
+ if (HasASC)
+ for (auto *Inst : ValuesToRevisitAS)
+ if (hasConflictingAS(Inst))
+ return false;
+
return true;
}
@@ -329,6 +374,8 @@ bool PointerReplacer::collectUsersRecursive(Instruction &I) {
ValuesToRevisit.insert(Inst);
continue;
}
+
+ ValuesToRevisitAS.insert(SI);
Worklist.insert(SI);
if (!collectUsersRecursive(*SI))
return false;
@@ -342,6 +389,7 @@ bool PointerReplacer::collectUsersRecursive(Instruction &I) {
Worklist.insert(Inst);
} else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
Worklist.insert(Inst);
+ HasASC = true;
if (!collectUsersRecursive(*Inst))
return false;
} else if (Inst->isLifetimeStartOrEnd()) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast-cmemptrreplacer.ll b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast-cmemptrreplacer.ll
new file mode 100644
index 0000000000000..d853af4b13a80
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast-cmemptrreplacer.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s
+
+; Variant of select with addrspacecast in one branch on path to root alloca (ic opt is then disabled)
+define void @addrspacecast_true_path(ptr addrspace(4) align 8 byref([2 x i8]) %arg) {
+; CHECK-LABEL: define void @addrspacecast_true_path(ptr addrspace(4) byref([2 x i8]) align 8 %arg) {
+; CHECK-NEXT: %coerce = alloca [2 x i8], align 8, addrspace(5)
+; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 8 dereferenceable(16) %coerce, ptr addrspace(4) noundef align 8 dereferenceable(16) %arg, i64 16, i1 false)
+; CHECK-NEXT: %load.coerce = load i32, ptr addrspace(5) %coerce, align 8
+; CHECK-NEXT: %cmp.i = icmp slt i32 %load.coerce, 10
+; CHECK-NEXT: %inline_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i32 5
+; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %inline_values.i to ptr
+; CHECK-NEXT: %out_of_line_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i32 6
+; CHECK-NEXT: %2 = load ptr, ptr addrspace(5) %out_of_line_values.i, align 8
+; CHECK-NEXT: %retval.0.i = select i1 %cmp.i, ptr %1, ptr %2
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %retval.0.i)
+; CHECK-NEXT: ret void
+; CHECK-NEXT:}
+ %coerce = alloca [2 x i8], align 8, addrspace(5)
+ call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %coerce, ptr addrspace(4) align 8 %arg, i64 16, i1 false)
+ %load.coerce = load i32, ptr addrspace(5) %coerce, align 8
+ %cmp.i = icmp slt i32 %load.coerce, 10
+ %inline_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 5
+ %ret.1 = addrspacecast ptr addrspace(5) %inline_values.i to ptr
+ %out_of_line_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 6
+ %ret.0 = load ptr, ptr addrspace(5) %out_of_line_values.i, align 8
+ %retval.0.i = select i1 %cmp.i, ptr %ret.1, ptr %ret.0
+ call void @llvm.lifetime.end(i64 8, ptr addrspace(0) %retval.0.i)
+ ret void
+}
+
+; Variant of select with valid addrspacecast in both branches on path to root alloca (ic opt remains enabled)
+define void @addrspacecast_both_paths(ptr addrspace(4) align 8 byref([2 x i8]) %arg) {
+; CHECK-LABEL: define void @addrspacecast_both_paths(ptr addrspace(4) byref([2 x i8]) align 8 %arg) {
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+ %coerce = alloca [2 x i8], align 8, addrspace(5)
+ call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %coerce, ptr addrspace(4) align 8 %arg, i64 16, i1 false)
+ %load.coerce = load i32, ptr addrspace(5) %coerce, align 8
+ %cmp.i = icmp slt i32 %load.coerce, 10
+ %inline_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 5
+ %ret.1 = addrspacecast ptr addrspace(5) %inline_values.i to ptr
+ %in_of_line_values.i = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 6
+ %ret.0 = addrspacecast ptr addrspace(5) %in_of_line_values.i to ptr
+ %retval.0.i = select i1 %cmp.i, ptr %ret.1, ptr %ret.0
+ call void @llvm.lifetime.start(i64 8, ptr addrspace(0) %retval.0.i)
+ ret void
+}
+
More information about the llvm-commits
mailing list