[PATCH] D104796: [AMDGPU] Check for pointer operand while refining LDS align
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 23 10:23:48 PDT 2021
rampitec created this revision.
rampitec added reviewers: foad, hsmhsm.
Herald added subscribers: kerbowa, jfb, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
rampitec requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
Also skips the propagation if alignment is 1.
https://reviews.llvm.org/D104796
Files:
llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
Index: llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
+++ llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
@@ -6,6 +6,7 @@
; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [32 x i8] }
; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { i16, [2 x i8], i16 }
; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i64], [32 x i32] }
+; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i32 addrspace(3)*] }
; CHECK-NOT: @lds.1
@lds.1 = internal unnamed_addr addrspace(3) global [32 x i8] undef, align 1
@@ -17,6 +18,9 @@
; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 16
; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 8
+; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 16
+; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 4
+
; CHECK-LABEL: @k1
; CHECK: %1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, %llvm.amdgcn.kernel.k1.lds.t addrspace(3)* @llvm.amdgcn.kernel.k1.lds, i32 0, i32 0), i32 0, i32 0
; CHECK: %2 = addrspacecast i8 addrspace(3)* %1 to i8*
@@ -127,3 +131,24 @@
ret void
}
+
+ at lds.6 = internal unnamed_addr addrspace(3) global [2 x i32 addrspace(3)*] undef, align 4
+
+; Check that aligment is not propagated if use is not a pointer operand.
+
+; CHECK-LABEL: @k4
+; SUPER-ALIGN_ON: store i32 undef, i32 addrspace(3)* %ptr, align 8
+; SUPER-ALIGN_OFF: store i32 undef, i32 addrspace(3)* %ptr, align 4
+; CHECK: store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4
+; SUPER-ALIGN_ON: %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 8
+; SUPER-ALIGN_OFF: %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4
+; CHECK: %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4
+define amdgpu_kernel void @k4() {
+ %gep = getelementptr inbounds i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* bitcast ([2 x i32 addrspace(3)*] addrspace(3)* @lds.6 to i32 addrspace(3)* addrspace(3)*), i64 1
+ %ptr = bitcast i32 addrspace(3)* addrspace(3)* %gep to i32 addrspace(3)*
+ store i32 undef, i32 addrspace(3)* %ptr, align 4
+ store i32 addrspace(3)* %ptr, i32 addrspace(3)** undef, align 4
+ %val1 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 1, i32 2 monotonic monotonic, align 4
+ %val2 = cmpxchg volatile i32 addrspace(3)** undef, i32 addrspace(3)* %ptr, i32 addrspace(3)* undef monotonic monotonic, align 4
+ ret void
+}
Index: llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -335,7 +335,7 @@
void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL,
unsigned MaxDepth = 5) {
- if (!MaxDepth)
+ if (!MaxDepth || A == 1)
return;
for (User *U : Ptr->users()) {
@@ -344,15 +344,20 @@
continue;
}
if (auto *SI = dyn_cast<StoreInst>(U)) {
- SI->setAlignment(std::max(A, SI->getAlign()));
+ if (SI->getPointerOperand() == Ptr)
+ SI->setAlignment(std::max(A, SI->getAlign()));
continue;
}
if (auto *AI = dyn_cast<AtomicRMWInst>(U)) {
- AI->setAlignment(std::max(A, AI->getAlign()));
+ // None of atomicrmw operations can work on pointers, but let's
+ // check it anyway in case it will or we will process ConstantExpr.
+ if (AI->getPointerOperand() == Ptr)
+ AI->setAlignment(std::max(A, AI->getAlign()));
continue;
}
if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
- AI->setAlignment(std::max(A, AI->getAlign()));
+ if (AI->getPointerOperand() == Ptr)
+ AI->setAlignment(std::max(A, AI->getAlign()));
continue;
}
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D104796.354016.patch
Type: text/x-patch
Size: 4396 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210623/4812892a/attachment-0001.bin>
More information about the llvm-commits
mailing list