[llvm] d274d64 - [AMDGPU] Check for pointer operand while refining LDS align

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 23 12:28:03 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-06-23T12:27:55-07:00
New Revision: d274d64ef45f99387428d80a4f4b81dee91305e8

URL: https://github.com/llvm/llvm-project/commit/d274d64ef45f99387428d80a4f4b81dee91305e8
DIFF: https://github.com/llvm/llvm-project/commit/d274d64ef45f99387428d80a4f4b81dee91305e8.diff

LOG: [AMDGPU] Check for pointer operand while refining LDS align

Also skips the propagation if alignment is 1.

Differential Revision: https://reviews.llvm.org/D104796

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
    llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index a3a43bd8d407..f5cd3d1ede70 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -335,7 +335,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
 
   void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL,
                            unsigned MaxDepth = 5) {
-    if (!MaxDepth)
+    if (!MaxDepth || A == 1)
       return;
 
     for (User *U : Ptr->users()) {
@@ -344,15 +344,20 @@ class AMDGPULowerModuleLDS : public ModulePass {
         continue;
       }
       if (auto *SI = dyn_cast<StoreInst>(U)) {
-        SI->setAlignment(std::max(A, SI->getAlign()));
+        if (SI->getPointerOperand() == Ptr)
+          SI->setAlignment(std::max(A, SI->getAlign()));
         continue;
       }
       if (auto *AI = dyn_cast<AtomicRMWInst>(U)) {
-        AI->setAlignment(std::max(A, AI->getAlign()));
+        // None of atomicrmw operations can work on pointers, but let's
+        // check it anyway in case it will or we will process ConstantExpr.
+        if (AI->getPointerOperand() == Ptr)
+          AI->setAlignment(std::max(A, AI->getAlign()));
         continue;
       }
       if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
-        AI->setAlignment(std::max(A, AI->getAlign()));
+        if (AI->getPointerOperand() == Ptr)
+          AI->setAlignment(std::max(A, AI->getAlign()));
         continue;
       }
       if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
index 470177ba392a..5ce7dcb36ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
@@ -6,6 +6,7 @@
 ; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [32 x i8] }
 ; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i16, [2 x i8], i16 }
 ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] }
+; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i32 addrspace(3)*] }
 
 ; CHECK-NOT: @lds.1
 @lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1
@@ -17,6 +18,9 @@
 ; SUPER-ALIGN_ON:  @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 16
 ; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 8
 
+; SUPER-ALIGN_ON:  @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 16
+; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 4
+
 ; CHECK-LABEL: @k1
 ; CHECK:  %1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 0), i32 0, i32 0
 ; CHECK:  %2 = addrspacecast i8 addrspace(3)* %1 to i8*
@@ -127,3 +131,24 @@ define amdgpu_kernel void @k3(i64 %x) {
 
   ret void
 }
+
+ at lds.6 = internal unnamed_addr addrspace(3) global [2 x i32 addrspace(3)*] undef, align 4
+
+; Check that aligment is not propagated if use is not a pointer operand.
+
+; CHECK-LABEL: @k4
+; SUPER-ALIGN_ON:  store i32 undef, i32 addrspace(3)* %ptr, align 8
+; SUPER-ALIGN_OFF: store i32 undef, i32 addrspace(3)* %ptr, align 4
+; CHECK:           store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4
+; SUPER-ALIGN_ON:  %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 8
+; SUPER-ALIGN_OFF: %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4
+; CHECK:           %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4
+define amdgpu_kernel void @k4() {
+  %gep = getelementptr inbounds i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* bitcast ([2 x i32 addrspace(3)*] addrspace(3)* @lds.6 to i32 addrspace(3)* addrspace(3)*), i64 1
+  %ptr = bitcast i32 addrspace(3)* addrspace(3)* %gep to i32 addrspace(3)*
+  store i32 undef, i32 addrspace(3)* %ptr, align 4
+  store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4
+  %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4
+  %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list