[llvm-branch-commits] [llvm] DAG: Check if is_fpclass is custom, instead of isLegalOrCustom (PR #105577)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 21 13:18:50 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
For some reason, isOperationLegalOrCustom is not the same as
isOperationLegal || isOperationCustom. Unfortunately, it checks
if the type is legal which makes it uesless for custom lowering
on non-legal types (which is always ppcf128).
Really the DAG builder shouldn't be going to expand this in the
builder, it makes it difficult to work with. It's only here to work
around the DAG requiring legal integer types the same size as
the FP type after type legalization.
---
Full diff: https://github.com/llvm/llvm-project/pull/105577.diff
5 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+2-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+11-6)
- (modified) llvm/test/CodeGen/AMDGPU/fract-match.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll (+128-77)
- (modified) llvm/test/CodeGen/PowerPC/is_fpclass.ll (+14-23)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 60dcb118542785..09a3def6586493 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7032,7 +7032,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// If ISD::IS_FPCLASS should be expanded, do it right now, because the
// expansion can use illegal types. Making expansion early allows
// legalizing these types prior to selection.
- if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
+ if (!TLI.isOperationLegal(ISD::IS_FPCLASS, ArgVT) &&
+ !TLI.isOperationCustom(ISD::IS_FPCLASS, ArgVT)) {
SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
setValue(&I, Result);
return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e57c8f8b7b4835..866e04bcc7fb2d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -426,12 +426,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
// default unless marked custom/legal.
- setOperationAction(
- ISD::IS_FPCLASS,
- {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32,
- MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
- MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64},
- Custom);
+ setOperationAction(ISD::IS_FPCLASS,
+ {MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
+ MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
+ MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64,
+ MVT::v16f64},
+ Custom);
+
+ if (isTypeLegal(MVT::f16))
+ setOperationAction(ISD::IS_FPCLASS,
+ {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16},
+ Custom);
// Expand to fneg + fadd.
setOperationAction(ISD::FSUB, MVT::f64, Expand);
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index 1b28ddb2c58620..b212b9caf8400e 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -2135,16 +2135,16 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
; GFX8-LABEL: safe_math_fract_v2f16:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v6, 0x204
+; GFX8-NEXT: s_movk_i32 s6, 0x204
; GFX8-NEXT: v_floor_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_floor_f16_e32 v4, v0
-; GFX8-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v6 src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_fract_f16_sdwa v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, s6 src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_pack_b32_f16 v3, v4, v3
; GFX8-NEXT: v_fract_f16_e32 v4, v0
-; GFX8-NEXT: v_fract_f16_sdwa v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v6
; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5]
; GFX8-NEXT: v_pack_b32_f16 v0, v0, v5
; GFX8-NEXT: global_store_dword v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 9c248bd6e8b2aa..3d8e9e60973053 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -959,47 +959,86 @@ define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind {
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8CHECK-LABEL: isnan_v2f16:
-; GFX8CHECK: ; %bb.0:
-; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
-; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
-; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
+; GFX8SELDAG-LABEL: isnan_v2f16:
+; GFX8SELDAG: ; %bb.0:
+; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9CHECK-LABEL: isnan_v2f16:
-; GFX9CHECK: ; %bb.0:
-; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9CHECK-NEXT: v_mov_b32_e32 v1, 3
-; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
-; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
-; GFX9CHECK-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD
-; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX9CHECK-NEXT: v_mov_b32_e32 v0, v2
-; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
+; GFX8GLISEL-LABEL: isnan_v2f16:
+; GFX8GLISEL: ; %bb.0:
+; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8GLISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
+; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
+; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
+; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10CHECK-LABEL: isnan_v2f16:
-; GFX10CHECK: ; %bb.0:
-; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10CHECK-NEXT: v_mov_b32_e32 v1, 3
-; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s4, v0, 3
-; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
-; GFX10CHECK-NEXT: v_cmp_class_f16_sdwa s4, v0, v1 src0_sel:WORD_1 src1_sel:DWORD
-; GFX10CHECK-NEXT: v_mov_b32_e32 v0, v2
-; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
-; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
+; GFX9SELDAG-LABEL: isnan_v2f16:
+; GFX9SELDAG: ; %bb.0:
+; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
+; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
+; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11CHECK-LABEL: isnan_v2f16:
-; GFX11CHECK: ; %bb.0:
-; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
-; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3
-; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+; GFX9GLISEL-LABEL: isnan_v2f16:
+; GFX9GLISEL: ; %bb.0:
+; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 3
+; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
+; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
+; GFX9GLISEL-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
+; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10SELDAG-LABEL: isnan_v2f16:
+; GFX10SELDAG: ; %bb.0:
+; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
+; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v2
+; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10GLISEL-LABEL: isnan_v2f16:
+; GFX10GLISEL: ; %bb.0:
+; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 3
+; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s4, v0, 3
+; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10GLISEL-NEXT: v_cmp_class_f16_sdwa s4, v0, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, v2
+; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4
+; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-LABEL: isnan_v2f16:
+; GFX11SELDAG: ; %bb.0:
+; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-LABEL: isnan_v2f16:
+; GFX11GLISEL: ; %bb.0:
+; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3
+; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v1, 3
+; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31]
%1 = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> %x, i32 3) ; nan
ret <2 x i1> %1
}
@@ -1196,16 +1235,17 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
; GFX8SELDAG-LABEL: isnan_v4f16:
; GFX8SELDAG: ; %bb.0:
; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
-; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v1
-; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
-; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v3, 3
-; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
-; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v4, 3
-; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
+; GFX8SELDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v3, v3
+; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
+; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v2, v2
+; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
+; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX8SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1
+; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, v4
; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8GLISEL-LABEL: isnan_v4f16:
@@ -1227,16 +1267,14 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
; GFX9SELDAG-LABEL: isnan_v4f16:
; GFX9SELDAG: ; %bb.0:
; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v0, 3
-; GFX9SELDAG-NEXT: v_mov_b32_e32 v3, 3
-; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
-; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[4:5], v1, 3
-; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
-; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v3 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v0, v0
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
-; GFX9SELDAG-NEXT: v_cmp_class_f16_sdwa s[4:5], v1, v3 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9SELDAG-NEXT: v_cmp_u_f16_sdwa s[4:5], v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9SELDAG-NEXT: v_cmp_u_f16_e32 vcc, v1, v1
; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
-; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, v5
+; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX9SELDAG-NEXT: v_mov_b32_e32 v1, v4
; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -1259,16 +1297,14 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
; GFX10SELDAG-LABEL: isnan_v4f16:
; GFX10SELDAG: ; %bb.0:
; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v0, 3
-; GFX10SELDAG-NEXT: v_mov_b32_e32 v3, 3
-; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4
-; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v1, 3
-; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
-; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v0, v3 src0_sel:WORD_1 src1_sel:DWORD
-; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v5
+; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1
+; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4
-; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v3 src0_sel:WORD_1 src1_sel:DWORD
+; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX10SELDAG-NEXT: v_cmp_u_f16_sdwa s4, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1
; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v4
+; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31]
;
@@ -1288,20 +1324,35 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
; GFX10GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11CHECK-LABEL: isnan_v4f16:
-; GFX11CHECK: ; %bb.0:
-; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v0, 3
-; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v1
-; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v1, 3
-; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v3, 3
-; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX11CHECK-NEXT: v_cmp_class_f16_e64 s0, v4, 3
-; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
-; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
+; GFX11SELDAG-LABEL: isnan_v4f16:
+; GFX11SELDAG: ; %bb.0:
+; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX11SELDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-NEXT: v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-LABEL: isnan_v4f16:
+; GFX11GLISEL: ; %bb.0:
+; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v0, 3
+; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11GLISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v1
+; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v1, 3
+; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v3, 3
+; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX11GLISEL-NEXT: v_cmp_class_f16_e64 s0, v4, 3
+; GFX11GLISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31]
%1 = call <4 x i1> @llvm.is.fpclass.v4f16(<4 x half> %x, i32 3) ; nan
ret <4 x i1> %1
}
diff --git a/llvm/test/CodeGen/PowerPC/is_fpclass.ll b/llvm/test/CodeGen/PowerPC/is_fpclass.ll
index 57f457553a5407..f88e23fe105f76 100644
--- a/llvm/test/CodeGen/PowerPC/is_fpclass.ll
+++ b/llvm/test/CodeGen/PowerPC/is_fpclass.ll
@@ -29,10 +29,10 @@ define i1 @isnan_double(double %x) nounwind {
define i1 @isnan_ppc_fp128(ppc_fp128 %x) nounwind {
; CHECK-LABEL: isnan_ppc_fp128:
; CHECK: # %bb.0:
-; CHECK-NEXT: fcmpu 0, 1, 1
+; CHECK-NEXT: xststdcdp 0, 1, 64
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: li 4, 1
-; CHECK-NEXT: isel 3, 4, 3, 3
+; CHECK-NEXT: iseleq 3, 4, 3
; CHECK-NEXT: blr
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 3) ; nan
ret i1 %1
@@ -77,14 +77,10 @@ define i1 @isnan_double_strictfp(double %x) strictfp nounwind {
define i1 @isnan_ppc_fp128_strictfp(ppc_fp128 %x) strictfp nounwind {
; CHECK-LABEL: isnan_ppc_fp128_strictfp:
; CHECK: # %bb.0:
-; CHECK-NEXT: mffprd 3, 1
-; CHECK-NEXT: li 4, 2047
-; CHECK-NEXT: clrldi 3, 3, 1
-; CHECK-NEXT: rldic 4, 4, 52, 1
-; CHECK-NEXT: cmpd 3, 4
+; CHECK-NEXT: xststdcdp 0, 1, 64
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: li 4, 1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 4, 3
; CHECK-NEXT: blr
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 3) strictfp ; nan
ret i1 %1
@@ -279,15 +275,11 @@ define i1 @issnan_ppc_fp128(ppc_fp128 %x) nounwind {
; CHECK-LABEL: issnan_ppc_fp128:
; CHECK: # %bb.0:
; CHECK-NEXT: mffprd 3, 1
-; CHECK-NEXT: li 4, 4095
-; CHECK-NEXT: clrldi 3, 3, 1
-; CHECK-NEXT: rldic 4, 4, 51, 1
-; CHECK-NEXT: cmpd 3, 4
-; CHECK-NEXT: li 4, 2047
-; CHECK-NEXT: rldic 4, 4, 52, 1
-; CHECK-NEXT: cmpd 1, 3, 4
+; CHECK-NEXT: xststdcdp 1, 1, 64
+; CHECK-NEXT: rldicl 3, 3, 32, 32
+; CHECK-NEXT: andis. 3, 3, 8
; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: crnand 20, 5, 0
+; CHECK-NEXT: crnand 20, 6, 2
; CHECK-NEXT: isel 3, 0, 3, 20
; CHECK-NEXT: blr
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 1)
@@ -345,13 +337,12 @@ define i1 @isqnan_ppc_fp128(ppc_fp128 %x) nounwind {
; CHECK-LABEL: isqnan_ppc_fp128:
; CHECK: # %bb.0:
; CHECK-NEXT: mffprd 3, 1
-; CHECK-NEXT: li 4, -17
-; CHECK-NEXT: clrldi 3, 3, 1
-; CHECK-NEXT: rldicl 4, 4, 47, 1
-; CHECK-NEXT: cmpd 3, 4
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: li 4, 1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: xststdcdp 1, 1, 64
+; CHECK-NEXT: rldicl 3, 3, 13, 51
+; CHECK-NEXT: andi. 3, 3, 1
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: crnand 20, 6, 1
+; CHECK-NEXT: isel 3, 0, 3, 20
; CHECK-NEXT: blr
%1 = call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %x, i32 2)
ret i1 %1
``````````
</details>
https://github.com/llvm/llvm-project/pull/105577
More information about the llvm-branch-commits
mailing list