[llvm] r269945 - AMDGPU: Handle alloca promoting with null operands
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed May 18 08:57:22 PDT 2016
Author: arsenm
Date: Wed May 18 10:57:21 2016
New Revision: 269945
URL: http://llvm.org/viewvc/llvm-project?rev=269945&view=rev
Log:
AMDGPU: Handle alloca promoting with null operands
If the second pointer in a multi-pointer instruction is
a constant, we can replace the type.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp?rev=269945&r1=269944&r2=269945&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp Wed May 18 10:57:21 2016
@@ -510,6 +510,9 @@ bool AMDGPUPromoteAlloca::binaryOpIsDeri
if (Val == OtherOp)
OtherOp = Inst->getOperand(OpIdx1);
+ if (isa<ConstantPointerNull>(OtherOp))
+ return true;
+
Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
if (!isa<AllocaInst>(OtherObj))
return false;
@@ -573,6 +576,9 @@ bool AMDGPUPromoteAlloca::collectUsesWit
if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
return false;
+
+ // May need to rewrite constant operands.
+ WorkList.push_back(ICmp);
}
if (!User->getType()->isPointerTy())
@@ -713,16 +719,45 @@ void AMDGPUPromoteAlloca::handleAlloca(A
for (Value *V : WorkList) {
CallInst *Call = dyn_cast<CallInst>(V);
if (!Call) {
- Type *EltTy = V->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
+ Value *Src0 = CI->getOperand(0);
+ Type *EltTy = Src0->getType()->getPointerElementType();
+ PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+
+ if (isa<ConstantPointerNull>(CI->getOperand(0)))
+ CI->setOperand(0, ConstantPointerNull::get(NewTy));
+
+ if (isa<ConstantPointerNull>(CI->getOperand(1)))
+ CI->setOperand(1, ConstantPointerNull::get(NewTy));
+
+ continue;
+ }
// The operand's value should be corrected on its own.
if (isa<AddrSpaceCastInst>(V))
continue;
+ Type *EltTy = V->getType()->getPointerElementType();
+ PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
V->mutateType(NewTy);
+
+ // Adjust the types of any constant operands.
+ if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ if (isa<ConstantPointerNull>(SI->getOperand(1)))
+ SI->setOperand(1, ConstantPointerNull::get(NewTy));
+
+ if (isa<ConstantPointerNull>(SI->getOperand(2)))
+ SI->setOperand(2, ConstantPointerNull::get(NewTy));
+ } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+ if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
+ Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
+ }
+ }
+
continue;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll?rev=269945&r1=269944&r2=269945&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll Wed May 18 10:57:21 2016
@@ -18,6 +18,32 @@ define void @lds_promoted_alloca_icmp_sa
ret void
}
+; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs(
+; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, null
+define void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %cmp = icmp eq i32* %ptr0, null
+ %zext = zext i1 %cmp to i32
+ store volatile i32 %zext, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs(
+; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
+; CHECK: %cmp = icmp eq i32 addrspace(3)* null, %ptr0
+define void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %cmp = icmp eq i32* null, %ptr0
+ %zext = zext i1 %cmp to i32
+ store volatile i32 %zext, i32 addrspace(1)* %out
+ ret void
+}
+
; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
; CHECK: %alloca = alloca [16 x i32], align 4
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll?rev=269945&r1=269944&r2=269945&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll Wed May 18 10:57:21 2016
@@ -32,6 +32,40 @@ endif:
ret void
}
+; CHECK-LABEL: @branch_ptr_phi_alloca_null_0(
+; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
+define void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ br i1 undef, label %if, label %endif
+
+if:
+ %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+ br label %endif
+
+endif:
+ %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ]
+ store i32 0, i32* %phi.ptr, align 4
+ ret void
+}
+
+; CHECK-LABEL: @branch_ptr_phi_alloca_null_1(
+; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ]
+define void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ br i1 undef, label %if, label %endif
+
+if:
+ %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+ br label %endif
+
+endif:
+ %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ]
+ store i32 0, i32* %phi.ptr, align 4
+ ret void
+}
+
; CHECK-LABEL: @one_phi_value(
; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14
; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
Modified: llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll?rev=269945&r1=269944&r2=269945&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll Wed May 18 10:57:21 2016
@@ -99,4 +99,35 @@ bb2:
ret void
}
+; CHECK-LABEL: @select_null_rhs(
+; CHECK-NOT: alloca
+; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null
+define void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
+bb:
+ %tmp = alloca double, align 8
+ store double 0.000000e+00, double* %tmp, align 8
+ %tmp2 = icmp eq i32 %arg1, 0
+ %tmp3 = select i1 %tmp2, double* %tmp, double* null
+ store double 1.000000e+00, double* %tmp3, align 8
+ %tmp4 = load double, double* %tmp, align 8
+ store double %tmp4, double addrspace(1)* %arg
+ ret void
+}
+
+; CHECK-LABEL: @select_null_lhs(
+; CHECK-NOT: alloca
+; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}}
+define void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
+bb:
+ %tmp = alloca double, align 8
+ store double 0.000000e+00, double* %tmp, align 8
+ %tmp2 = icmp eq i32 %arg1, 0
+ %tmp3 = select i1 %tmp2, double* null, double* %tmp
+ store double 1.000000e+00, double* %tmp3, align 8
+ %tmp4 = load double, double* %tmp, align 8
+ store double %tmp4, double addrspace(1)* %arg
+ ret void
+}
+
attributes #0 = { norecurse nounwind "amdgpu-max-waves-per-eu"="1" }
+attributes #1 = { norecurse nounwind }
\ No newline at end of file
More information about the llvm-commits
mailing list