[llvm] dfec702 - AMDGPU: Check for other uses when looking through casted select
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 08:31:31 PST 2020
Author: Matt Arsenault
Date: 2020-01-23T11:31:24-05:00
New Revision: dfec702290e4cbd2fb965096788225ef3aac0986
URL: https://github.com/llvm/llvm-project/commit/dfec702290e4cbd2fb965096788225ef3aac0986
DIFF: https://github.com/llvm/llvm-project/commit/dfec702290e4cbd2fb965096788225ef3aac0986.diff
LOG: AMDGPU: Check for other uses when looking through casted select
Fixes mesa regression on ext_transform_feedback-max-varyings
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index f4576f10825e..3ac634b6a47e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -552,6 +552,8 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
CastInst *CastOp;
+ // TODO: Should probably try to handle some cases with multiple
+ // users. Duplicating the select may be profitable for division.
SelectInst *Sel = findSelectThroughCast(BO.getOperand(0), CastOp);
if (!Sel || !Sel->hasOneUse()) {
SelOpNo = 1;
@@ -568,6 +570,8 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
return false;
if (CastOp) {
+ if (!CastOp->hasOneUse())
+ return false;
CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index 6402cc9547ee..11bb1c27b1d5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -492,3 +492,55 @@ define i32 @select_add_zext_select(i1 %cond) {
%op = add i32 %trunc, 42
ret i32 %op
}
+
+define i32 @select_add_bitcast_select(i1 %cond) {
+; IR-LABEL: @select_add_bitcast_select(
+; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 1065353258, i32 1073741866
+; IR-NEXT: ret i32 [[OP]]
+;
+; GCN-LABEL: select_add_bitcast_select:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 1, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0x4000002a
+; GCN-NEXT: v_mov_b32_e32 v2, 0x3f80002a
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %select = select i1 %cond, float 1.0, float 2.0
+ %trunc = bitcast float %select to i32
+ %op = add i32 %trunc, 42
+ ret i32 %op
+}
+
+; If we fold through a cast, we need to ensure it doesn't have
+; multiple uses.
+define <2 x half> @multi_use_cast_regression(i1 %cond) {
+; IR-LABEL: @multi_use_cast_regression(
+; IR-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], half 0xH3C00, half 0xH0000
+; IR-NEXT: [[FPEXT:%.*]] = fpext half [[SELECT]] to float
+; IR-NEXT: [[FSUB:%.*]] = fsub nsz float 1.000000e+00, [[FPEXT]]
+; IR-NEXT: [[CALL:%.*]] = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[FPEXT]], float [[FSUB]])
+; IR-NEXT: ret <2 x half> [[CALL]]
+;
+; GCN-LABEL: multi_use_cast_regression:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 1, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0x3c00
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT: v_sub_f32_e32 v1, 1.0, v0
+; GCN-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %select = select i1 %cond, half 1.000000e+00, half 0.000000e+00
+ %fpext = fpext half %select to float
+ %fsub = fsub nsz float 1.0, %fpext
+ %call = call nsz <2 x half> @llvm.amdgcn.cvt.pkrtz(float %fpext, float %fsub) #3
+ ret <2 x half> %call
+}
+
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
+
+attributes #0 = { nounwind readnone speculatable }
More information about the llvm-commits
mailing list