[PATCH] D104796: [AMDGPU] Check for pointer operand while refining LDS align
    Stanislav Mekhanoshin via Phabricator via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Wed Jun 23 10:23:48 PDT 2021
    
    
  
rampitec created this revision.
rampitec added reviewers: foad, hsmhsm.
Herald added subscribers: kerbowa, jfb, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
rampitec requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
Also skips the propagation if alignment is 1.
https://reviews.llvm.org/D104796
Files:
  llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
  llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
Index: llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
+++ llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
@@ -6,6 +6,7 @@
 ; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [32 x i8] }
 ; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i16, [2 x i8], i16 }
 ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] }
+; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i32 addrspace(3)*] }
 
 ; CHECK-NOT: @lds.1
 @lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1
@@ -17,6 +18,9 @@
 ; SUPER-ALIGN_ON:  @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 16
 ; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 8
 
+; SUPER-ALIGN_ON:  @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 16
+; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 4
+
 ; CHECK-LABEL: @k1
 ; CHECK:  %1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 0), i32 0, i32 0
 ; CHECK:  %2 = addrspacecast i8 addrspace(3)* %1 to i8*
@@ -127,3 +131,24 @@
 
   ret void
 }
+
+ at lds.6 = internal unnamed_addr addrspace(3) global [2 x i32 addrspace(3)*] undef, align 4
+
+; Check that aligment is not propagated if use is not a pointer operand.
+
+; CHECK-LABEL: @k4
+; SUPER-ALIGN_ON:  store i32 undef, i32 addrspace(3)* %ptr, align 8
+; SUPER-ALIGN_OFF: store i32 undef, i32 addrspace(3)* %ptr, align 4
+; CHECK:           store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4
+; SUPER-ALIGN_ON:  %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 8
+; SUPER-ALIGN_OFF: %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4
+; CHECK:           %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4
+define amdgpu_kernel void @k4() {
+  %gep = getelementptr inbounds i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* bitcast ([2 x i32 addrspace(3)*] addrspace(3)* @lds.6 to i32 addrspace(3)* addrspace(3)*), i64 1
+  %ptr = bitcast i32 addrspace(3)* addrspace(3)* %gep to i32 addrspace(3)*
+  store i32 undef, i32 addrspace(3)* %ptr, align 4
+  store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4
+  %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4
+  %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4
+  ret void
+}
Index: llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -335,7 +335,7 @@
 
   void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL,
                            unsigned MaxDepth = 5) {
-    if (!MaxDepth)
+    if (!MaxDepth || A == 1)
       return;
 
     for (User *U : Ptr->users()) {
@@ -344,15 +344,20 @@
         continue;
       }
       if (auto *SI = dyn_cast<StoreInst>(U)) {
-        SI->setAlignment(std::max(A, SI->getAlign()));
+        if (SI->getPointerOperand() == Ptr)
+          SI->setAlignment(std::max(A, SI->getAlign()));
         continue;
       }
       if (auto *AI = dyn_cast<AtomicRMWInst>(U)) {
-        AI->setAlignment(std::max(A, AI->getAlign()));
+        // None of atomicrmw operations can work on pointers, but let's
+        // check it anyway in case it will or we will process ConstantExpr.
+        if (AI->getPointerOperand() == Ptr)
+          AI->setAlignment(std::max(A, AI->getAlign()));
         continue;
       }
       if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
-        AI->setAlignment(std::max(A, AI->getAlign()));
+        if (AI->getPointerOperand() == Ptr)
+          AI->setAlignment(std::max(A, AI->getAlign()));
         continue;
       }
       if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D104796.354016.patch
Type: text/x-patch
Size: 4396 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210623/4812892a/attachment-0001.bin>
    
    
More information about the llvm-commits
mailing list