[llvm] [ValueTracking] Do not use FMF from fcmp (PR #142266)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 1 09:36:38 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
This patch introduces an FMF parameter for `matchDecomposedSelectPattern` to pass FMF flags from select, instead of fcmp.
Closes https://github.com/llvm/llvm-project/issues/137998.
Closes https://github.com/llvm/llvm-project/issues/141017.
---
Patch is 74.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142266.diff
13 Files Affected:
- (modified) llvm/include/llvm/Analysis/ValueTracking.h (+2-1)
- (modified) llvm/lib/Analysis/ValueTracking.cpp (+7-6)
- (modified) llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll (+19)
- (modified) llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll (+62-76)
- (modified) llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll (+3-3)
- (modified) llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll (+32-32)
- (modified) llvm/test/CodeGen/ARM/fp16-vminmaxnm.ll (+44-44)
- (modified) llvm/test/CodeGen/ARM/vminmaxnm-safe.ll (+46-46)
- (modified) llvm/test/CodeGen/ARM/vminmaxnm.ll (+56-56)
- (modified) llvm/test/CodeGen/PowerPC/vec-min-max.ll (+4-4)
- (modified) llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll (+25)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll (+18-24)
- (modified) llvm/unittests/Analysis/ValueTrackingTest.cpp (+5-6)
``````````diff
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index d81c3f10bba90..feb7a1fa2cb35 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -889,7 +889,8 @@ inline SelectPatternResult matchSelectPattern(const Value *V, const Value *&LHS,
/// predicate and given values as its true/false operands would match.
LLVM_ABI SelectPatternResult matchDecomposedSelectPattern(
CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
- Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0);
+ FastMathFlags FMF = FastMathFlags(), Instruction::CastOps *CastOp = nullptr,
+ unsigned Depth = 0);
/// Determine the pattern for predicate `X Pred Y ? X : Y`.
LLVM_ABI SelectPatternResult getSelectPattern(
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index fc19b2ccf7964..253b8bd8f7bc5 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -8905,19 +8905,20 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
- return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
- CastOp, Depth);
+ return llvm::matchDecomposedSelectPattern(
+ CmpI, TrueVal, FalseVal, LHS, RHS,
+ isa<FPMathOperator>(SI) ? SI->getFastMathFlags() : FastMathFlags(),
+ CastOp, Depth);
}
SelectPatternResult llvm::matchDecomposedSelectPattern(
CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
- Instruction::CastOps *CastOp, unsigned Depth) {
+ FastMathFlags FMF, Instruction::CastOps *CastOp, unsigned Depth) {
CmpInst::Predicate Pred = CmpI->getPredicate();
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
- FastMathFlags FMF;
- if (isa<FPMathOperator>(CmpI))
- FMF = CmpI->getFastMathFlags();
+ if (isa<FPMathOperator>(CmpI) && CmpI->hasNoNaNs())
+ FMF.setNoNaNs();
// Bail out early.
if (CmpI->isEquality())
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 550e89f4a27f9..aec31eced397e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -53,3 +53,22 @@ define i64 @test_integer(i64 %in) {
%val = select i1 %cmp, i64 0, i64 %in
ret i64 %val
}
+
+; Make sure we don't translate it into fminnm when the nsz flag is set on the fcmp.
+define float @minnum_fcmp_nsz(float %x, float %y) {
+; CHECK-LABEL: minnum_fcmp_nsz:
+ %cmp = fcmp nnan nsz ole float %x, %y
+ %sel = select i1 %cmp, float %x, float %y
+ ret float %sel
+; CHECK-NOT: fminnm
+; CHECK: fcsel s0, s0, s1, le
+}
+
+; Make sure we translate it into fminnm when the nsz flag is set on the select.
+define float @minnum_select_nsz(float %x, float %y) {
+; CHECK-LABEL: minnum_select_nsz:
+ %cmp = fcmp nnan ole float %x, %y
+ %sel = select nsz i1 %cmp, float %x, float %y
+ ret float %sel
+; CHECK: fminnm s0, s0, s1
+}
diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
index f7bd5f8d5bfb4..39428dc448018 100644
--- a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll
@@ -355,17 +355,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
@@ -375,12 +373,7 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
-; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
-; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp ule <2 x float> %a, %b
%val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -499,17 +492,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
-; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
-; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
+; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
@@ -519,12 +510,7 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
-; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
-; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%cmp = fcmp uge <2 x float> %a, %b
%val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -673,10 +659,10 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -852,10 +838,10 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -1097,16 +1083,16 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
@@ -1337,16 +1323,16 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2
; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
@@ -1667,26 +1653,26 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0
-; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1
-; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2
-; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
@@ -2009,26 +1995,26 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
-; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
-; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
-; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
-; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0
-; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1
-; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2
-; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3
+; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
+; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
+; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
+; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 996b46c51ab36..52fe5ce1a8a5f 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -517,9 +517,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-NEXT: .short 0x0000 @ half 0
entry:
%cmp1 = fcmp nsz ole half 0., %a
- %cond1 = select i1 %cmp1, half 0., half %a
+ %cond1 = select nsz i1 %cmp1, half 0., half %a
%cmp2 = fcmp nsz uge half 0., %cond1
- %cond2 = select i1 %cmp2, half 0., half %cond1
+ %cond2 = select nsz i1 %cmp2, half 0., half %cond1
ret half %cond2
}
@@ -540,7 +540,7 @@ define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-NEXT: .short 0x8000 @ half -0
entry:
%cmp1 = fcmp nsz ule half -0., %a
- %cond1 = select i1 %cmp1, half -0., half %a
+ %cond1 = select nsz i1 %cmp1, half -0., half %a
%cmp2 = fcmp nsz oge half -0., %cond1
%cond2 = select i1 %cmp2, half -0., half %cond1
ret half %cond2
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
index 6a61bb594b430..0b41c738080a6 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-vector.ll
@@ -10,7 +10,7 @@ define <4 x half> @test1(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -19,7 +19,7 @@ define <4 x half> @test2(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ogt <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -28,7 +28,7 @@ define <4 x half> @test3(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -37,7 +37,7 @@ define <4 x half> @test4(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast oge <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -46,7 +46,7 @@ define <4 x half> @test5(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -55,7 +55,7 @@ define <4 x half> @test6(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast olt <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -64,7 +64,7 @@ define <4 x half> @test7(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -73,7 +73,7 @@ define <4 x half> @test8(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ole <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -84,7 +84,7 @@ define <4 x half> @test11(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -93,7 +93,7 @@ define <4 x half> @test12(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ugt <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -102,7 +102,7 @@ define <4 x half> @test13(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -111,7 +111,7 @@ define <4 x half> @test14(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast uge <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -120,7 +120,7 @@ define <4 x half> @test15(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -129,7 +129,7 @@ define <4 x half> @test16(<4 x half> %A, <4 x half> %B) {
; CHECK: vmaxnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ult <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
ret <4 x half> %tmp4
}
@@ -138,7 +138,7 @@ define <4 x half> @test17(<4 x half> %A, <4 x half> %B) {
; CHECK: vminnm.f16 d0, d0, d1
; CHECK-NEXT: bx lr
%tmp3 = fcmp fast ule <4 x half> %A, %B
- %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
+ %tmp4 = select nsz <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
ret <4 x half> %tmp4
}
@@ -147,7 +147,7 @@ define <4 x h...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/142266
More information about the llvm-commits
mailing list