[llvm] InferAddressSpaces: Improve handling of instructions with multiple pointer uses (PR #101922)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 5 10:28:31 PDT 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/101922

>From d81032445ecf1e9e1313f238a446d397403590aa Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 5 Aug 2024 11:25:42 +0400
Subject: [PATCH] InferAddressSpaces: Improve handling of instructions with
 multiple pointer uses

The use list iteration worked correctly for the load and store case. The atomic
instructions happen to have the pointer value as the last visited operand, but we
rejected the instruction as simple after the first encountered use.

Ignore the use list for the recognized load/store/atomic instructions, and just
try to directly replace the known pointer use.
---
 .../Transforms/Scalar/InferAddressSpaces.cpp  |  46 +++--
 .../AMDGPU/store-pointer-to-self.ll           | 161 +++++++++++++++++-
 2 files changed, 186 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 830c15249582c..90ba7e268d3dc 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1002,31 +1002,50 @@ bool InferAddressSpacesImpl::updateAddressSpace(
   return true;
 }
 
+/// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal
+/// with \p NewVal.
+static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx,
+                                 Value *OldVal, Value *NewVal) {
+  Use &U = Inst->getOperandUse(OpIdx);
+  if (U.get() == OldVal) {
+    U.set(NewVal);
+    return true;
+  }
+
+  return false;
+}
+
+template <typename InstrType>
+static bool replaceSimplePointerUse(const TargetTransformInfo &TTI,
+                                    InstrType *MemInstr, unsigned AddrSpace,
+                                    Value *OldV, Value *NewV) {
+  if (!MemInstr->isVolatile() || TTI.hasVolatileVariant(MemInstr, AddrSpace)) {
+    return replaceOperandIfSame(MemInstr, InstrType::getPointerOperandIndex(),
+                                OldV, NewV);
+  }
+
+  return false;
+}
+
 /// \p returns true if \p U is the pointer operand of a memory instruction with
 /// a single pointer operand that can have its address space changed by simply
 /// mutating the use to a new value. If the memory instruction is volatile,
 /// return true only if the target allows the memory instruction to be volatile
 /// in the new address space.
 static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
-                                             Use &U, unsigned AddrSpace) {
-  User *Inst = U.getUser();
-  unsigned OpNo = U.getOperandNo();
-
+                                             User *Inst, unsigned AddrSpace,
+                                             Value *OldV, Value *NewV) {
   if (auto *LI = dyn_cast<LoadInst>(Inst))
-    return OpNo == LoadInst::getPointerOperandIndex() &&
-           (!LI->isVolatile() || TTI.hasVolatileVariant(LI, AddrSpace));
+    return replaceSimplePointerUse(TTI, LI, AddrSpace, OldV, NewV);
 
   if (auto *SI = dyn_cast<StoreInst>(Inst))
-    return OpNo == StoreInst::getPointerOperandIndex() &&
-           (!SI->isVolatile() || TTI.hasVolatileVariant(SI, AddrSpace));
+    return replaceSimplePointerUse(TTI, SI, AddrSpace, OldV, NewV);
 
   if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
-    return OpNo == AtomicRMWInst::getPointerOperandIndex() &&
-           (!RMW->isVolatile() || TTI.hasVolatileVariant(RMW, AddrSpace));
+    return replaceSimplePointerUse(TTI, RMW, AddrSpace, OldV, NewV);
 
   if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst))
-    return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
-           (!CmpX->isVolatile() || TTI.hasVolatileVariant(CmpX, AddrSpace));
+    return replaceSimplePointerUse(TTI, CmpX, AddrSpace, OldV, NewV);
 
   return false;
 }
@@ -1226,11 +1245,10 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
       I = skipToNextUser(I, E);
 
       if (isSimplePointerUseValidToReplace(
-              *TTI, U, V->getType()->getPointerAddressSpace())) {
+              *TTI, CurUser, V->getType()->getPointerAddressSpace(), V, NewV)) {
         // If V is used as the pointer operand of a compatible memory operation,
         // sets the pointer operand to NewV. This replacement does not change
         // the element type, so the resultant load/store is still valid.
-        U.set(NewV);
         continue;
       }
 
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
index bce0e4ec1fe16..5fcf1f6b31178 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
@@ -22,18 +22,47 @@ define void @store_flat_pointer_to_self() {
   ret void
 }
 
-; FIXME: Should be able to optimize the pointer operand to flat.
+define void @store_volatile_flat_pointer_to_self() {
+; CHECK-LABEL: define void @store_volatile_flat_pointer_to_self() {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT]])
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  store volatile ptr %flat, ptr %flat, align 8
+  call void @user(ptr %flat)
+  ret void
+}
+
 define ptr @atomicrmw_xchg_flat_pointer_to_self() {
 ; CHECK-LABEL: define ptr @atomicrmw_xchg_flat_pointer_to_self() {
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[XCHG:%.*]] = atomicrmw xchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT1]] seq_cst, align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT1]])
+; CHECK-NEXT:    ret ptr [[XCHG]]
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  %xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
+  call void @user(ptr %flat)
+  ret ptr %xchg
+}
+
+define ptr @atomicrmw_volatile_xchg_flat_pointer_to_self() {
+; CHECK-LABEL: define ptr @atomicrmw_volatile_xchg_flat_pointer_to_self() {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
 ; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT:    [[XCHG:%.*]] = atomicrmw xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
+; CHECK-NEXT:    [[XCHG:%.*]] = atomicrmw volatile xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
 ; CHECK-NEXT:    call void @user(ptr [[FLAT]])
 ; CHECK-NEXT:    ret ptr [[XCHG]]
 ;
   %alloca = alloca ptr, align 8, addrspace(5)
   %flat = addrspacecast ptr addrspace(5) %alloca to ptr
-  %xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
+  %xchg = atomicrmw volatile xchg ptr %flat, ptr %flat seq_cst, align 8
   call void @user(ptr %flat)
   ret ptr %xchg
 }
@@ -42,14 +71,46 @@ define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
 ; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(
 ; CHECK-SAME: ptr [[CMP:%.*]]) {
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[CMP]], ptr [[FLAT1]] seq_cst seq_cst, align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT1]])
+; CHECK-NEXT:    ret { ptr, i1 } [[CMPX]]
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  %cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+  call void @user(ptr %flat)
+  ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @cmpxchg_volatile_flat_pointer_new_to_self(ptr %cmp) {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_volatile_flat_pointer_new_to_self(
+; CHECK-SAME: ptr [[CMP:%.*]]) {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
 ; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
+; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg volatile ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
 ; CHECK-NEXT:    call void @user(ptr [[FLAT]])
 ; CHECK-NEXT:    ret { ptr, i1 } [[CMPX]]
 ;
   %alloca = alloca ptr, align 8, addrspace(5)
   %flat = addrspacecast ptr addrspace(5) %alloca to ptr
-  %cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+  %cmpx = cmpxchg volatile ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+  call void @user(ptr %flat)
+  ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @volatile_cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
+; CHECK-LABEL: define { ptr, i1 } @volatile_cmpxchg_flat_pointer_new_to_self(
+; CHECK-SAME: ptr [[CMP:%.*]]) {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg volatile ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT]])
+; CHECK-NEXT:    ret { ptr, i1 } [[CMPX]]
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  %cmpx = cmpxchg volatile ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
   call void @user(ptr %flat)
   ret { ptr, i1 } %cmpx
 }
@@ -58,14 +119,100 @@ define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(ptr %new) {
 ; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(
 ; CHECK-SAME: ptr [[NEW:%.*]]) {
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT1]], ptr [[NEW]] seq_cst seq_cst, align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT1]])
+; CHECK-NEXT:    ret { ptr, i1 } [[CMPX]]
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
+  call void @user(ptr %flat)
+  ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @cmpxchg_flat_pointer_cmp_new_self() {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_new_self() {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
 ; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[FLAT]], ptr [[NEW]] seq_cst seq_cst, align 8
+; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT]], ptr [[FLAT]] seq_cst seq_cst, align 8
 ; CHECK-NEXT:    call void @user(ptr [[FLAT]])
 ; CHECK-NEXT:    ret { ptr, i1 } [[CMPX]]
 ;
   %alloca = alloca ptr, align 8, addrspace(5)
   %flat = addrspacecast ptr addrspace(5) %alloca to ptr
-  %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
+  %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %flat seq_cst seq_cst, align 8
   call void @user(ptr %flat)
   ret { ptr, i1 } %cmpx
 }
+
+define void @multi_store_flat_pointer_to_self() {
+; CHECK-LABEL: define void @multi_store_flat_pointer_to_self() {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT]])
+; CHECK-NEXT:    store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    store ptr addrspace(5) [[ALLOCA]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  store ptr %flat, ptr %flat, align 8
+  store ptr %flat, ptr %flat, align 8
+  call void @user(ptr %flat)
+  store ptr %flat, ptr addrspace(5) %alloca, align 8
+  store ptr addrspace(5) %alloca, ptr %flat, align 8
+  ret void
+}
+
+define void @mixed_volatile_multi_store_flat_pointer_to_self() {
+; CHECK-LABEL: define void @mixed_volatile_multi_store_flat_pointer_to_self() {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
+; CHECK-NEXT:    store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT]])
+; CHECK-NEXT:    store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    store ptr addrspace(5) [[ALLOCA]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
+; CHECK-NEXT:    store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  store ptr %flat, ptr %flat, align 8
+  store volatile ptr %flat, ptr %flat, align 8
+  store ptr %flat, ptr %flat, align 8
+  call void @user(ptr %flat)
+  store ptr %flat, ptr addrspace(5) %alloca, align 8
+  store ptr addrspace(5) %alloca, ptr %flat, align 8
+  store volatile ptr %flat, ptr %flat, align 8
+  store ptr %flat, ptr %flat, align 8
+  ret void
+}
+
+define amdgpu_kernel void @uselist_regression_skipped_load(ptr nocapture readonly %Arg, i32 %i) {
+; CHECK-LABEL: define amdgpu_kernel void @uselist_regression_skipped_load(
+; CHECK-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[I:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG]] to ptr addrspace(1)
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I]]
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
+; CHECK-NEXT:    [[P2:%.*]] = load volatile ptr, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1)
+; CHECK-NEXT:    store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %Arg.global = addrspacecast ptr %Arg to ptr addrspace(1)
+  %Arg.flat = addrspacecast ptr addrspace(1) %Arg.global to ptr
+  %p1 = getelementptr inbounds ptr, ptr %Arg.flat, i32 %i
+  %p2 = load volatile ptr, ptr %p1, align 8
+  %p2.global = addrspacecast ptr %p2 to ptr addrspace(1)
+  %p2.flat = addrspacecast ptr addrspace(1) %p2.global to ptr
+  store float 0.000000e+00, ptr %p2.flat, align 4
+  ret void
+}



More information about the llvm-commits mailing list