[llvm] dfec702 - AMDGPU: Check for other uses when looking through casted select

Thu Jan 23 08:31:31 PST 2020

Author: Matt Arsenault
Date: 2020-01-23T11:31:24-05:00
New Revision: dfec702290e4cbd2fb965096788225ef3aac0986

URL: https://github.com/llvm/llvm-project/commit/dfec702290e4cbd2fb965096788225ef3aac0986
DIFF: https://github.com/llvm/llvm-project/commit/dfec702290e4cbd2fb965096788225ef3aac0986.diff

LOG: AMDGPU: Check for other uses when looking through casted select

Fixes mesa regression on ext_transform_feedback-max-varyings

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index f4576f10825e..3ac634b6a47e 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -552,6 +552,8 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
 
   CastInst *CastOp;
 
+  // TODO: Should probably try to handle some cases with multiple
+  // users. Duplicating the select may be profitable for division.
   SelectInst *Sel = findSelectThroughCast(BO.getOperand(0), CastOp);
   if (!Sel || !Sel->hasOneUse()) {
     SelOpNo = 1;
@@ -568,6 +570,8 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
     return false;
 
   if (CastOp) {
+    if (!CastOp->hasOneUse())
+      return false;
     CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
     CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index 6402cc9547ee..11bb1c27b1d5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -492,3 +492,55 @@ define i32 @select_add_zext_select(i1 %cond) {
   %op = add i32 %trunc, 42
   ret i32 %op
 }
+
+define i32 @select_add_bitcast_select(i1 %cond) {
+; IR-LABEL: @select_add_bitcast_select(
+; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i32 1065353258, i32 1073741866
+; IR-NEXT:    ret i32 [[OP]]
+;
+; GCN-LABEL: select_add_bitcast_select:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
+; GCN-NEXT:    v_mov_b32_e32 v1, 0x4000002a
+; GCN-NEXT:    v_mov_b32_e32 v2, 0x3f80002a
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %select = select i1 %cond, float 1.0, float 2.0
+  %trunc = bitcast float %select to i32
+  %op = add i32 %trunc, 42
+  ret i32 %op
+}
+
+; If we fold through a cast, we need to ensure it doesn't have
+; multiple uses.
+define <2 x half> @multi_use_cast_regression(i1 %cond) {
+; IR-LABEL: @multi_use_cast_regression(
+; IR-NEXT:    [[SELECT:%.*]] = select i1 [[COND:%.*]], half 0xH3C00, half 0xH0000
+; IR-NEXT:    [[FPEXT:%.*]] = fpext half [[SELECT]] to float
+; IR-NEXT:    [[FSUB:%.*]] = fsub nsz float 1.000000e+00, [[FPEXT]]
+; IR-NEXT:    [[CALL:%.*]] = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[FPEXT]], float [[FSUB]])
+; IR-NEXT:    ret <2 x half> [[CALL]]
+;
+; GCN-LABEL: multi_use_cast_regression:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, 1, v0
+; GCN-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT:    v_sub_f32_e32 v1, 1.0, v0
+; GCN-NEXT:    v_cvt_pkrtz_f16_f32 v0, v0, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %select = select i1 %cond, half 1.000000e+00, half 0.000000e+00
+  %fpext = fpext half %select to float
+  %fsub = fsub nsz float 1.0, %fpext
+  %call = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %fpext, float %fsub) #3
+  ret <2 x half> %call
+}
+
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
+
+attributes #0 = { nounwind readnone speculatable }