[llvm] [SDAG] Reverse the canonicalization of isInf/isNanOrInf (PR #81404)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 11 01:24:34 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
In commit https://github.com/llvm/llvm-project/commit/2b582440c16c72b6b021ea5c212ceda3bdfb2b9b, we canonicalize the isInf/isNanOrInf idiom into fabs+fcmp for better analysis/codegen (See also the discussion in https://github.com/llvm/llvm-project/pull/76338).
This patch reverses the fabs+fcmp to `is.fpclass`. If the `is.fpclass` is not supported by the target, it will be expanded by TLI.
Fixes the regression introduced by https://github.com/llvm/llvm-project/commit/2b582440c16c72b6b021ea5c212ceda3bdfb2b9b and https://github.com/llvm/llvm-project/pull/80414#issuecomment-1936374206.
---
Patch is 71.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81404.diff
9 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+45-25)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h (+1)
- (added) llvm/test/CodeGen/AArch64/fpclass-test.ll (+126)
- (modified) llvm/test/CodeGen/AArch64/isinf.ll (+7-15)
- (modified) llvm/test/CodeGen/AMDGPU/fp-classify.ll (+114-82)
- (modified) llvm/test/CodeGen/AMDGPU/fract-match.ll (+130-129)
- (added) llvm/test/CodeGen/RISCV/fpclass-test.ll (+111)
- (modified) llvm/test/CodeGen/X86/compare-inf.ll (+85-27)
- (added) llvm/test/CodeGen/X86/fpclass-test.ll (+202)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5ce1013f30fd1b..22ceca33195cab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3467,12 +3467,50 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
+SDValue SelectionDAGBuilder::lowerIsFpClass(Value *ClassVal,
+ FPClassTest ClassTest) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ SDLoc sdl = getCurSDLoc();
+
+ EVT DestVT =
+ TLI.getValueType(DL, CmpInst::makeCmpResultType(ClassVal->getType()));
+ EVT ArgVT = TLI.getValueType(DL, ClassVal->getType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function &F = MF.getFunction();
+ SDValue Op = getValue(ClassVal);
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(!F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
+ // If ISD::IS_FPCLASS should be expanded, do it right now, because the
+ // expansion can use illegal types. Making expansion early allows
+ // legalizing these types prior to selection.
+ if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT))
+ return TLI.expandIS_FPCLASS(DestVT, Op, ClassTest, Flags, sdl, DAG);
+
+ SDValue Check = DAG.getTargetConstant(ClassTest, sdl, MVT::i32);
+ return DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
+}
+
void SelectionDAGBuilder::visitFCmp(const User &I) {
FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
- if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) {
predicate = FC->getPredicate();
- else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+
+ // Reverse the canonicalization if it is a FP class test
+ auto ShouldReverseTransform = [](FPClassTest ClassTest) {
+ return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
+ };
+ auto [ClassVal, ClassTest] =
+ fcmpToClassTest(predicate, *FC->getParent()->getParent(),
+ FC->getOperand(0), FC->getOperand(1));
+ if (ClassVal && (ShouldReverseTransform(ClassTest) ||
+ ShouldReverseTransform(~ClassTest))) {
+ setValue(&I, lowerIsFpClass(ClassVal, ClassTest));
+ return;
+ }
+ } else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
predicate = FCmpInst::Predicate(FC->getPredicate());
+
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -6666,29 +6704,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res.getValue(0));
return;
case Intrinsic::is_fpclass: {
- const DataLayout DLayout = DAG.getDataLayout();
- EVT DestVT = TLI.getValueType(DLayout, I.getType());
- EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
- FPClassTest Test = static_cast<FPClassTest>(
- cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
- MachineFunction &MF = DAG.getMachineFunction();
- const Function &F = MF.getFunction();
- SDValue Op = getValue(I.getArgOperand(0));
- SDNodeFlags Flags;
- Flags.setNoFPExcept(
- !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
- // If ISD::IS_FPCLASS should be expanded, do it right now, because the
- // expansion can use illegal types. Making expansion early allows
- // legalizing these types prior to selection.
- if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
- SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
- setValue(&I, Result);
- return;
- }
-
- SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
- SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
- setValue(&I, V);
+ setValue(&I,
+ lowerIsFpClass(
+ I.getArgOperand(0),
+ static_cast<FPClassTest>(
+ cast<ConstantInt>(I.getArgOperand(1))->getZExtValue())));
return;
}
case Intrinsic::get_fpenv: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 47657313cb6a3b..dfc9369117c79d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -700,6 +700,7 @@ class SelectionDAGBuilder {
MCSymbol *&BeginLabel);
SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+ SDValue lowerIsFpClass(Value *ClassVal, FPClassTest ClassTest);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/test/CodeGen/AArch64/fpclass-test.ll b/llvm/test/CodeGen/AArch64/fpclass-test.ll
new file mode 100644
index 00000000000000..b549722690afdd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fpclass-test.ll
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define i1 @test_is_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: mov x8, #9218868437227405311 // =0x7fefffffffffffff
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp ueq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_not_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp one double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_inf(double %arg) {
+; CHECK-LABEL: test_is_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp oeq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf(double %arg) {
+; CHECK-LABEL: test_is_not_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp une double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #9223090561878065151 // =0x7ffeffffffffffff
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldr x9, [sp, #8]
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #9223090561878065152 // =0x7fff000000000000
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldr x9, [sp, #8]
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index 458bd7eeba16cf..834417b98743a8 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,22 +58,14 @@ define i32 @replace_isinf_call_f64(double %x) {
define i32 @replace_isinf_call_f128(fp128 %x) {
; CHECK-LABEL: replace_isinf_call_f128:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: ldrb w8, [sp, #15]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldr q0, [sp]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
%cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index 6fa7df913812a3..ed9ce4d62383b1 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -61,10 +61,10 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; SI-NEXT: s_load_dword s0, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x207
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_nlg_f32_e64 s[0:1], |s0|, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
@@ -72,11 +72,11 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x207
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_nlg_f32_e64 s[2:3], |s2|, v0
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_nlg_f32_e64 s2, 0x7f800000, |s2|
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x207
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -143,25 +143,29 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
; SI-LABEL: test_isfinite_pattern_0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x1f8
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_0:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -173,8 +177,10 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1fb
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
@@ -349,13 +355,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
-; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0
-; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s3, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -363,11 +369,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; VI-LABEL: test_isfinite_not_pattern_2:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_cmp_o_f32_e64 s[4:5], s2, s2
-; VI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0
-; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s3, v0
+; VI-NEXT: s_and_b64 s[2:3], s[4:5], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -380,7 +386,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cmp_o_f32_e64 s2, s2, s2
-; GFX11-NEXT: v_cmp_neq_f32_e64 s3, 0x7f800000, |s3|
+; GFX11-NEXT: v_cmp_class_f32_e64 s3, s3, 0x1fb
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s2, s2, s3
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
@@ -405,11 +411,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_cmp_u_f32_e64 s[0:1], s2, s2
-; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s2|, v0
-; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -418,11 +424,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_cmp_u_f32_e64 s[2:3], s4, s4
-; VI-NEXT: v_cmp_neq_f32_e64 s[4:5], |s4|, v0
-; VI-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -437,7 +443,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cmp_u_f32_e64 s3, s2, s2
-; GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x7f800000, |s2|
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1fb
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
@@ -458,25 +464,29 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
; SI-LABEL: test_isfinite_pattern_4:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x1f8
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_4:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/81404
More information about the llvm-commits
mailing list