[llvm] [ValueTracking] Do not use FMF from icmp (PR #142266)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sat May 31 04:02:05 PDT 2025
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/142266
Closes https://github.com/llvm/llvm-project/issues/141017.
>From 8947c32197e33abe332187650e50cc3985456e4b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sat, 31 May 2025 19:01:13 +0800
Subject: [PATCH] [ValueTracking] Do not use FMF from icmp
---
llvm/include/llvm/Analysis/ValueTracking.h | 3 +-
llvm/lib/Analysis/ValueTracking.cpp | 13 +-
.../AMDGPU/select-flags-to-fmin-fmax.ll | 138 ++++++++----------
llvm/test/CodeGen/PowerPC/vec-min-max.ll | 8 +-
.../AArch64/predicated-reduction.ll | 42 +++---
llvm/unittests/Analysis/ValueTrackingTest.cpp | 11 +-
6 files changed, 98 insertions(+), 117 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index d81c3f10bba90..feb7a1fa2cb35 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -889,7 +889,8 @@ inline SelectPatternResult matchSelectPattern(const Value *V, const Value *&LHS,
/// predicate and given values as its true/false operands would match.
LLVM_ABI SelectPatternResult matchDecomposedSelectPattern(
CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
- Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0);
+ FastMathFlags FMF = FastMathFlags(), Instruction::CastOps *CastOp = nullptr,
+ unsigned Depth = 0);
/// Determine the pattern for predicate `X Pred Y ? X : Y`.
LLVM_ABI SelectPatternResult getSelectPattern(
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index fc19b2ccf7964..253b8bd8f7bc5 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -8905,19 +8905,20 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
- return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
- CastOp, Depth);
+ return llvm::matchDecomposedSelectPattern(
+ CmpI, TrueVal, FalseVal, LHS, RHS,
+ isa<FPMathOperator>(SI) ? SI->getFastMathFlags() : FastMathFlags(),
+ CastOp, Depth);
}
SelectPatternResult llvm::matchDecomposedSelectPattern(
CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
- Instruction::CastOps *CastOp, unsigned Depth) {
+ FastMathFlags FMF, Instruction::CastOps *CastOp, unsigned Depth) {
CmpInst::Predicate Pred = CmpI->getPredicate();
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
- FastMathFlags FMF;
- if (isa<FPMathOperator>(CmpI))
- FMF = CmpI->getFastMathFlags();
+ if (isa<FPMathOperator>(CmpI) && CmpI->hasNoNaNs())
+ FMF.setNoNaNs();
// Bail out early.
if (CmpI->isEquality())
diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
index f7bd5f8d5bfb4..39428dc448018 100644
--- a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
@@ -355,17 +355,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
@@ -375,12 +373,7 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
-; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
-; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule <2 x float> %a, %b
%val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -499,17 +492,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
@@ -519,12 +510,7 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
-; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
-; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp uge <2 x float> %a, %b
%val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -673,10 +659,10 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -852,10 +838,10 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -1097,16 +1083,16 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
@@ -1337,16 +1323,16 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
@@ -1667,26 +1653,26 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
@@ -2009,26 +1995,26 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
diff --git a/llvm/test/CodeGen/PowerPC/vec-min-max.ll b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
index f204827005e29..8124fde2667dd 100644
--- a/llvm/test/CodeGen/PowerPC/vec-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
@@ -79,7 +79,7 @@ define <4 x float> @getsmaxf32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: blr
entry:
%0 = fcmp nnan nsz oge <4 x float> %a, %b
- %1 = select <4 x i1> %0, <4 x float> %a, <4 x float> %b
+ %1 = select nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
}
@@ -90,7 +90,7 @@ define <2 x double> @getsmaxf64(<2 x double> %a, <2 x double> %b) {
; CHECK-NEXT: blr
entry:
%0 = fcmp nnan nsz oge <2 x double> %a, %b
- %1 = select <2 x i1> %0, <2 x double> %a, <2 x double> %b
+ %1 = select nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
}
@@ -171,7 +171,7 @@ define <4 x float> @getsminf32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: blr
entry:
%0 = fcmp nnan nsz ole <4 x float> %a, %b
- %1 = select <4 x i1> %0, <4 x float> %a, <4 x float> %b
+ %1 = select nsz <4 x i1> %0, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
}
@@ -182,7 +182,7 @@ define <2 x double> @getsminf64(<2 x double> %a, <2 x double> %b) {
; CHECK-NEXT: blr
entry:
%0 = fcmp nnan nsz ole <2 x double> %a, %b
- %1 = select <2 x i1> %0, <2 x double> %a, <2 x double> %b
+ %1 = select nsz <2 x i1> %0, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
index 57f68e4fc55f7..c7098d2ce96ce 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
@@ -13,7 +13,7 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 8
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER23:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER22:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483640
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0
@@ -41,10 +41,8 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt <4 x double> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x double> [[TMP6]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <4 x double> [[TMP7]], [[TMP7]]
-; CHECK-NEXT: [[TMP24:%.*]] = fcmp fast ole <4 x double> [[TMP6]], splat (double -0.000000e+00)
-; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ole <4 x double> [[TMP7]], splat (double -0.000000e+00)
-; CHECK-NEXT: [[TMP12:%.*]] = select nnan ninf <4 x i1> [[TMP24]], <4 x double> splat (double -0.000000e+00), <4 x double> [[TMP6]]
-; CHECK-NEXT: [[TMP13:%.*]] = select nnan ninf <4 x i1> [[TMP25]], <4 x double> splat (double -0.000000e+00), <4 x double> [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP8]], <4 x double> [[TMP6]], <4 x double> splat (double -0.000000e+00)
+; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP9]], <4 x double> [[TMP7]], <4 x double> splat (double -0.000000e+00)
; CHECK-NEXT: [[TMP14]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI16]], [[TMP12]]
; CHECK-NEXT: [[TMP15]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI17]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP8]], <4 x double> [[TMP10]], <4 x double> splat (double -0.000000e+00)
@@ -60,16 +58,16 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
; CHECK-NEXT: [[BIN_RDX21:%.*]] = fadd reassoc arcp contract afn <4 x double> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP22:%.*]] = tail call reassoc arcp contract afn double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[BIN_RDX21]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER23]]
-; CHECK: [[FOR_BODY_PREHEADER23]]:
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER22]]
+; CHECK: [[FOR_BODY_PREHEADER22]]:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[V1_012_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[V1_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[V0_010_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER23]] ]
-; CHECK-NEXT: [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER23]] ]
-; CHECK-NEXT: [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER23]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER22]] ]
+; CHECK-NEXT: [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_011_PH]], %[[FOR_BODY_PREHEADER22]] ]
+; CHECK-NEXT: [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_010_PH]], %[[FOR_BODY_PREHEADER22]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
@@ -77,8 +75,7 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]]
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
-; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast ole double [[SUB]], -0.000000e+00
-; CHECK-NEXT: [[ADD8:%.*]] = select nnan ninf i1 [[DOTINV]], double -0.000000e+00, double [[SUB]]
+; CHECK-NEXT: [[ADD8:%.*]] = select i1 [[CMP1]], double [[SUB]], double -0.000000e+00
; CHECK-NEXT: [[V0_2]] = fadd reassoc arcp contract afn double [[V0_011]], [[ADD8]]
; CHECK-NEXT: [[ADD4:%.*]] = select i1 [[CMP1]], double [[MUL3]], double -0.000000e+00
; CHECK-NEXT: [[V1_2]] = fadd reassoc arcp contract afn double [[V1_012]], [[ADD4]]
@@ -232,10 +229,8 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R
; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt <4 x double> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[TMP10]], [[TMP10]]
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[TMP11]], [[TMP11]]
-; CHECK-NEXT: [[TMP28:%.*]] = fcmp fast ole <4 x double> [[TMP10]], splat (double -0.000000e+00)
-; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ole <4 x double> [[TMP11]], splat (double -0.000000e+00)
-; CHECK-NEXT: [[TMP16:%.*]] = select nnan ninf <4 x i1> [[TMP28]], <4 x double> splat (double -0.000000e+00), <4 x double> [[TMP10]]
-; CHECK-NEXT: [[TMP17:%.*]] = select nnan ninf <4 x i1> [[TMP29]], <4 x double> splat (double -0.000000e+00), <4 x double> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP12]], <4 x double> [[TMP10]], <4 x double> splat (double -0.000000e+00)
+; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP13]], <4 x double> [[TMP11]], <4 x double> splat (double -0.000000e+00)
; CHECK-NEXT: [[TMP18]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI32]], [[TMP16]]
; CHECK-NEXT: [[TMP19]] = fadd reassoc arcp contract afn <4 x double> [[VEC_PHI33]], [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP12]], <4 x double> [[TMP14]], <4 x double> splat (double -0.000000e+00)
@@ -253,13 +248,13 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]], label %[[FOR_BODY3_US_PREHEADER]]
; CHECK: [[FOR_BODY3_US_PREHEADER]]:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_US]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[V1_116_US_PH:%.*]] = phi double [ [[V1_021_US]], %[[FOR_BODY_US]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[V0_115_US_PH:%.*]] = phi double [ [[V0_020_US]], %[[FOR_BODY_US]] ], [ [[TMP26]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[V1_114_US_PH:%.*]] = phi double [ [[V1_021_US]], %[[FOR_BODY_US]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[V0_113_US_PH:%.*]] = phi double [ [[V0_020_US]], %[[FOR_BODY_US]] ], [ [[TMP26]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label %[[FOR_BODY3_US:.*]]
; CHECK: [[FOR_BODY3_US]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY3_US]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
-; CHECK-NEXT: [[V1_116_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_BODY3_US]] ], [ [[V1_116_US_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
-; CHECK-NEXT: [[V0_115_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_BODY3_US]] ], [ [[V0_115_US_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
+; CHECK-NEXT: [[V1_116_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_BODY3_US]] ], [ [[V1_114_US_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
+; CHECK-NEXT: [[V0_115_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_BODY3_US]] ], [ [[V0_113_US_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds nuw float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4
; CHECK-NEXT: [[CONV_US:%.*]] = fpext float [[TMP0]] to double
@@ -268,8 +263,7 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R
; CHECK-NEXT: [[SUB_US:%.*]] = fsub fast double [[MUL_US]], [[Z]]
; CHECK-NEXT: [[CMP4_US:%.*]] = fcmp fast ogt double [[SUB_US]], 0.000000e+00
; CHECK-NEXT: [[ADD7_US:%.*]] = fmul fast double [[SUB_US]], [[SUB_US]]
-; CHECK-NEXT: [[DOTINV_US:%.*]] = fcmp fast ole double [[SUB_US]], -0.000000e+00
-; CHECK-NEXT: [[ADD12_US:%.*]] = select nnan ninf i1 [[DOTINV_US]], double -0.000000e+00, double [[SUB_US]]
+; CHECK-NEXT: [[ADD12_US:%.*]] = select i1 [[CMP4_US]], double [[SUB_US]], double -0.000000e+00
; CHECK-NEXT: [[V0_2_US]] = fadd reassoc arcp contract afn double [[V0_115_US]], [[ADD12_US]]
; CHECK-NEXT: [[ADD7_US1:%.*]] = select i1 [[CMP4_US]], double [[ADD7_US]], double -0.000000e+00
; CHECK-NEXT: [[V1_2_US]] = fadd reassoc arcp contract afn double [[V1_116_US]], [[ADD7_US1]]
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index a5050542b8186..8343afd63bed5 100644
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@@ -202,12 +202,11 @@ TEST_F(MatchSelectPatternTest, FMinConstantZero) {
}
TEST_F(MatchSelectPatternTest, FMinConstantZeroNsz) {
- parseAssembly(
- "define float @test(float %a) {\n"
- " %1 = fcmp nsz ole float %a, 0.0\n"
- " %A = select i1 %1, float %a, float 0.0\n"
- " ret float %A\n"
- "}\n");
+ parseAssembly("define float @test(float %a) {\n"
+ " %1 = fcmp nsz ole float %a, 0.0\n"
+ " %A = select nsz i1 %1, float %a, float 0.0\n"
+ " ret float %A\n"
+ "}\n");
// But this should be, because we've ignored signed zeroes.
expectPattern({SPF_FMINNUM, SPNB_RETURNS_OTHER, true});
}
More information about the llvm-commits
mailing list