[llvm] InferAddressSpaces: Improve handling of instructions with multiple pointer uses (PR #101922)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 11:33:51 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/101922
>From 0348ef2d70df41b02ec3d2a3fc751e8e216523f5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 5 Aug 2024 11:25:42 +0400
Subject: [PATCH] InferAddressSpaces: Improve handling of instructions with
multiple pointer uses
The use list iteration worked correctly for the load and store case. The atomic
instructions happen to have the pointer value as the last visited operand, but we
rejected the instruction as simple after the first encountered use.
Ignore the use list for the recognized load/store/atomic instructions, and just
try to directly replace the known pointer use.
---
.../Transforms/Scalar/InferAddressSpaces.cpp | 64 ++++---
.../AMDGPU/store-pointer-to-self.ll | 161 +++++++++++++++++-
2 files changed, 194 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 0bb7517d784a32..a5571409bba682 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1022,31 +1022,52 @@ bool InferAddressSpacesImpl::updateAddressSpace(
return true;
}
-/// \p returns true if \p U is the pointer operand of a memory instruction with
-/// a single pointer operand that can have its address space changed by simply
-/// mutating the use to a new value. If the memory instruction is volatile,
-/// return true only if the target allows the memory instruction to be volatile
-/// in the new address space.
-static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
- Use &U, unsigned AddrSpace) {
- User *Inst = U.getUser();
- unsigned OpNo = U.getOperandNo();
+/// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal
+/// with \p NewVal.
+static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx,
+ Value *OldVal, Value *NewVal) {
+ Use &U = Inst->getOperandUse(OpIdx);
+ if (U.get() == OldVal) {
+ U.set(NewVal);
+ return true;
+ }
+
+ return false;
+}
+template <typename InstrType>
+static bool replaceSimplePointerUse(const TargetTransformInfo &TTI,
+ InstrType *MemInstr, unsigned AddrSpace,
+ Value *OldV, Value *NewV) {
+ if (!MemInstr->isVolatile() || TTI.hasVolatileVariant(MemInstr, AddrSpace)) {
+ return replaceOperandIfSame(MemInstr, InstrType::getPointerOperandIndex(),
+ OldV, NewV);
+ }
+
+ return false;
+}
+
+/// If \p OldV is used as the pointer operand of a compatible memory operation
+/// \p Inst, replaces the pointer operand with NewV.
+///
+/// This covers memory instructions with a single pointer operand that can have
+/// its address space changed by simply mutating the use to a new value.
+///
+/// \p returns true the user replacement was made.
+static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI,
+ User *Inst, unsigned AddrSpace,
+ Value *OldV, Value *NewV) {
if (auto *LI = dyn_cast<LoadInst>(Inst))
- return OpNo == LoadInst::getPointerOperandIndex() &&
- (!LI->isVolatile() || TTI.hasVolatileVariant(LI, AddrSpace));
+ return replaceSimplePointerUse(TTI, LI, AddrSpace, OldV, NewV);
if (auto *SI = dyn_cast<StoreInst>(Inst))
- return OpNo == StoreInst::getPointerOperandIndex() &&
- (!SI->isVolatile() || TTI.hasVolatileVariant(SI, AddrSpace));
+ return replaceSimplePointerUse(TTI, SI, AddrSpace, OldV, NewV);
if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
- return OpNo == AtomicRMWInst::getPointerOperandIndex() &&
- (!RMW->isVolatile() || TTI.hasVolatileVariant(RMW, AddrSpace));
+ return replaceSimplePointerUse(TTI, RMW, AddrSpace, OldV, NewV);
if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst))
- return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
- (!CmpX->isVolatile() || TTI.hasVolatileVariant(CmpX, AddrSpace));
+ return replaceSimplePointerUse(TTI, CmpX, AddrSpace, OldV, NewV);
return false;
}
@@ -1245,14 +1266,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
// to the next instruction.
I = skipToNextUser(I, E);
- if (isSimplePointerUseValidToReplace(
- *TTI, U, V->getType()->getPointerAddressSpace())) {
- // If V is used as the pointer operand of a compatible memory operation,
- // sets the pointer operand to NewV. This replacement does not change
- // the element type, so the resultant load/store is still valid.
- U.set(NewV);
+ unsigned AddrSpace = V->getType()->getPointerAddressSpace();
+ if (replaceIfSimplePointerUse(*TTI, CurUser, AddrSpace, V, NewV))
continue;
- }
// Skip if the current user is the new value itself.
if (CurUser == NewV)
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
index bce0e4ec1fe16d..5fcf1f6b311785 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
@@ -22,18 +22,47 @@ define void @store_flat_pointer_to_self() {
ret void
}
-; FIXME: Should be able to optimize the pointer operand to flat.
+define void @store_volatile_flat_pointer_to_self() {
+; CHECK-LABEL: define void @store_volatile_flat_pointer_to_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ store volatile ptr %flat, ptr %flat, align 8
+ call void @user(ptr %flat)
+ ret void
+}
+
define ptr @atomicrmw_xchg_flat_pointer_to_self() {
; CHECK-LABEL: define ptr @atomicrmw_xchg_flat_pointer_to_self() {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[XCHG:%.*]] = atomicrmw xchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT1]] seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT1]])
+; CHECK-NEXT: ret ptr [[XCHG]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
+ call void @user(ptr %flat)
+ ret ptr %xchg
+}
+
+define ptr @atomicrmw_volatile_xchg_flat_pointer_to_self() {
+; CHECK-LABEL: define ptr @atomicrmw_volatile_xchg_flat_pointer_to_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT: [[XCHG:%.*]] = atomicrmw xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
+; CHECK-NEXT: [[XCHG:%.*]] = atomicrmw volatile xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
; CHECK-NEXT: call void @user(ptr [[FLAT]])
; CHECK-NEXT: ret ptr [[XCHG]]
;
%alloca = alloca ptr, align 8, addrspace(5)
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
- %xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
+ %xchg = atomicrmw volatile xchg ptr %flat, ptr %flat seq_cst, align 8
call void @user(ptr %flat)
ret ptr %xchg
}
@@ -42,14 +71,46 @@ define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(
; CHECK-SAME: ptr [[CMP:%.*]]) {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[CMP]], ptr [[FLAT1]] seq_cst seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT1]])
+; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+ call void @user(ptr %flat)
+ ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @cmpxchg_volatile_flat_pointer_new_to_self(ptr %cmp) {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_volatile_flat_pointer_new_to_self(
+; CHECK-SAME: ptr [[CMP:%.*]]) {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg volatile ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
; CHECK-NEXT: call void @user(ptr [[FLAT]])
; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
;
%alloca = alloca ptr, align 8, addrspace(5)
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
- %cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+ %cmpx = cmpxchg volatile ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+ call void @user(ptr %flat)
+ ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @volatile_cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
+; CHECK-LABEL: define { ptr, i1 } @volatile_cmpxchg_flat_pointer_new_to_self(
+; CHECK-SAME: ptr [[CMP:%.*]]) {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg volatile ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %cmpx = cmpxchg volatile ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
call void @user(ptr %flat)
ret { ptr, i1 } %cmpx
}
@@ -58,14 +119,100 @@ define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(ptr %new) {
; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(
; CHECK-SAME: ptr [[NEW:%.*]]) {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT1]], ptr [[NEW]] seq_cst seq_cst, align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT1]])
+; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
+ call void @user(ptr %flat)
+ ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @cmpxchg_flat_pointer_cmp_new_self() {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_new_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[FLAT]], ptr [[NEW]] seq_cst seq_cst, align 8
+; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT]], ptr [[FLAT]] seq_cst seq_cst, align 8
; CHECK-NEXT: call void @user(ptr [[FLAT]])
; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
;
%alloca = alloca ptr, align 8, addrspace(5)
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
- %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
+ %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %flat seq_cst seq_cst, align 8
call void @user(ptr %flat)
ret { ptr, i1 } %cmpx
}
+
+define void @multi_store_flat_pointer_to_self() {
+; CHECK-LABEL: define void @multi_store_flat_pointer_to_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: store ptr addrspace(5) [[ALLOCA]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ store ptr %flat, ptr %flat, align 8
+ store ptr %flat, ptr %flat, align 8
+ call void @user(ptr %flat)
+ store ptr %flat, ptr addrspace(5) %alloca, align 8
+ store ptr addrspace(5) %alloca, ptr %flat, align 8
+ ret void
+}
+
+define void @mixed_volatile_multi_store_flat_pointer_to_self() {
+; CHECK-LABEL: define void @mixed_volatile_multi_store_flat_pointer_to_self() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: call void @user(ptr [[FLAT]])
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: store ptr addrspace(5) [[ALLOCA]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
+; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+ store ptr %flat, ptr %flat, align 8
+ store volatile ptr %flat, ptr %flat, align 8
+ store ptr %flat, ptr %flat, align 8
+ call void @user(ptr %flat)
+ store ptr %flat, ptr addrspace(5) %alloca, align 8
+ store ptr addrspace(5) %alloca, ptr %flat, align 8
+ store volatile ptr %flat, ptr %flat, align 8
+ store ptr %flat, ptr %flat, align 8
+ ret void
+}
+
+define amdgpu_kernel void @uselist_regression_skipped_load(ptr nocapture readonly %Arg, i32 %i) {
+; CHECK-LABEL: define amdgpu_kernel void @uselist_regression_skipped_load(
+; CHECK-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[I:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG]] to ptr addrspace(1)
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I]]
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
+; CHECK-NEXT: [[P2:%.*]] = load volatile ptr, ptr [[TMP0]], align 8
+; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1)
+; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %Arg.global = addrspacecast ptr %Arg to ptr addrspace(1)
+ %Arg.flat = addrspacecast ptr addrspace(1) %Arg.global to ptr
+ %p1 = getelementptr inbounds ptr, ptr %Arg.flat, i32 %i
+ %p2 = load volatile ptr, ptr %p1, align 8
+ %p2.global = addrspacecast ptr %p2 to ptr addrspace(1)
+ %p2.flat = addrspacecast ptr addrspace(1) %p2.global to ptr
+ store float 0.000000e+00, ptr %p2.flat, align 4
+ ret void
+}
More information about the llvm-commits
mailing list