[llvm] [CodeGenPrepare] Reverse the canonicalization of isInf/isNanOrInf (PR #81572)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 14 01:19:51 PDT 2024


https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/81572

>From 676c1c6dfeda586972da6ff2e420bfb49ac19da0 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 13 Feb 2024 13:04:12 +0800
Subject: [PATCH 1/4] [CodeGenPrepare] Add pre-commit tests. NFC.

---
 .../CodeGenPrepare/AArch64/fpclass-test.ll    | 146 +++++++++++++
 .../CodeGenPrepare/RISCV/fpclass-test.ll      | 146 +++++++++++++
 .../CodeGenPrepare/X86/fpclass-test.ll        | 194 ++++++++++++++++++
 3 files changed, 486 insertions(+)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll

diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll
new file mode 100644
index 00000000000000..436b3679ec860a
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll
@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -codegenprepare -S -mtriple=aarch64 < %s | FileCheck %s
+
+define i1 @test_is_inf_or_nan(double %arg) {
+; CHECK-LABEL: define i1 @test_is_inf_or_nan(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp ueq double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_not_inf_or_nan(double %arg) {
+; CHECK-LABEL: define i1 @test_is_not_inf_or_nan(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp one double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_inf(double %arg) {
+; CHECK-LABEL: define i1 @test_is_inf(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp oeq double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_not_inf(double %arg) {
+; CHECK-LABEL: define i1 @test_is_not_inf(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp une double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define <vscale x 2 x i1> @test_vec_is_inf_or_nan(<vscale x 2 x double> %arg) {
+; CHECK-LABEL: define <vscale x 2 x i1> @test_vec_is_inf_or_nan(
+; CHECK-SAME: <vscale x 2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq <vscale x 2 x double> [[ABS]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[RET]]
+;
+  %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
+  %ret = fcmp ueq <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <vscale x 2 x i1> %ret
+}
+
+define <vscale x 2 x i1> @test_vec_is_not_inf_or_nan(<vscale x 2 x double> %arg) {
+; CHECK-LABEL: define <vscale x 2 x i1> @test_vec_is_not_inf_or_nan(
+; CHECK-SAME: <vscale x 2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one <vscale x 2 x double> [[ABS]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[RET]]
+;
+  %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
+  %ret = fcmp one <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <vscale x 2 x i1> %ret
+}
+
+define <vscale x 2 x i1> @test_vec_is_inf(<vscale x 2 x double> %arg) {
+; CHECK-LABEL: define <vscale x 2 x i1> @test_vec_is_inf(
+; CHECK-SAME: <vscale x 2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq <vscale x 2 x double> [[ABS]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[RET]]
+;
+  %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
+  %ret = fcmp oeq <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <vscale x 2 x i1> %ret
+}
+
+define <vscale x 2 x i1> @test_vec_is_not_inf(<vscale x 2 x double> %arg) {
+; CHECK-LABEL: define <vscale x 2 x i1> @test_vec_is_not_inf(
+; CHECK-SAME: <vscale x 2 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une <vscale x 2 x double> [[ABS]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[RET]]
+;
+  %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
+  %ret = fcmp une <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <vscale x 2 x i1> %ret
+}
+
+define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_inf(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_not_inf(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll
new file mode 100644
index 00000000000000..95047da2637868
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll
@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -codegenprepare -S -mtriple=riscv64 < %s | FileCheck %s
+
+define i1 @test_is_inf_or_nan(double %arg) {
+; CHECK-LABEL: define i1 @test_is_inf_or_nan(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp ueq double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_not_inf_or_nan(double %arg) {
+; CHECK-LABEL: define i1 @test_is_not_inf_or_nan(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp one double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_inf(double %arg) {
+; CHECK-LABEL: define i1 @test_is_inf(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp oeq double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_not_inf(double %arg) {
+; CHECK-LABEL: define i1 @test_is_not_inf(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp une double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define <vscale x 4 x i1> @test_vec_is_inf_or_nan(<vscale x 4 x double> %arg) {
+; CHECK-LABEL: define <vscale x 4 x i1> @test_vec_is_inf_or_nan(
+; CHECK-SAME: <vscale x 4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq <vscale x 4 x double> [[ABS]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[RET]]
+;
+  %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
+  %ret = fcmp ueq <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <vscale x 4 x i1> %ret
+}
+
+define <vscale x 4 x i1> @test_vec_is_not_inf_or_nan(<vscale x 4 x double> %arg) {
+; CHECK-LABEL: define <vscale x 4 x i1> @test_vec_is_not_inf_or_nan(
+; CHECK-SAME: <vscale x 4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one <vscale x 4 x double> [[ABS]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[RET]]
+;
+  %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
+  %ret = fcmp one <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <vscale x 4 x i1> %ret
+}
+
+define <vscale x 4 x i1> @test_vec_is_inf(<vscale x 4 x double> %arg) {
+; CHECK-LABEL: define <vscale x 4 x i1> @test_vec_is_inf(
+; CHECK-SAME: <vscale x 4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq <vscale x 4 x double> [[ABS]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[RET]]
+;
+  %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
+  %ret = fcmp oeq <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <vscale x 4 x i1> %ret
+}
+
+define <vscale x 4 x i1> @test_vec_is_not_inf(<vscale x 4 x double> %arg) {
+; CHECK-LABEL: define <vscale x 4 x i1> @test_vec_is_not_inf(
+; CHECK-SAME: <vscale x 4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une <vscale x 4 x double> [[ABS]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[RET]]
+;
+  %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
+  %ret = fcmp une <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <vscale x 4 x i1> %ret
+}
+
+define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_inf(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_not_inf(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll
new file mode 100644
index 00000000000000..8b924f1640e3a4
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll
@@ -0,0 +1,194 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -codegenprepare -S -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+define i1 @test_is_inf_or_nan(double %arg) {
+; CHECK-LABEL: define i1 @test_is_inf_or_nan(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp ueq double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_not_inf_or_nan(double %arg) {
+; CHECK-LABEL: define i1 @test_is_not_inf_or_nan(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp one double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_inf(double %arg) {
+; CHECK-LABEL: define i1 @test_is_inf(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp oeq double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define i1 @test_is_not_inf(double %arg) {
+; CHECK-LABEL: define i1 @test_is_not_inf(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une double [[ABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call double @llvm.fabs.f64(double %arg)
+  %ret = fcmp une double %abs, 0x7FF0000000000000
+  ret i1 %ret
+}
+
+define <4 x i1> @test_vec_is_inf_or_nan(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x i1> @test_vec_is_inf_or_nan(
+; CHECK-SAME: <4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq <4 x double> [[ABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000>
+; CHECK-NEXT:    ret <4 x i1> [[RET]]
+;
+  %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
+  %ret = fcmp ueq <4 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <4 x i1> %ret
+}
+
+define <4 x i1> @test_vec_is_not_inf_or_nan(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x i1> @test_vec_is_not_inf_or_nan(
+; CHECK-SAME: <4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one <4 x double> [[ABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000>
+; CHECK-NEXT:    ret <4 x i1> [[RET]]
+;
+  %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
+  %ret = fcmp one <4 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <4 x i1> %ret
+}
+
+define <4 x i1> @test_vec_is_inf(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x i1> @test_vec_is_inf(
+; CHECK-SAME: <4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq <4 x double> [[ABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000>
+; CHECK-NEXT:    ret <4 x i1> [[RET]]
+;
+  %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
+  %ret = fcmp oeq <4 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <4 x i1> %ret
+}
+
+define <4 x i1> @test_vec_is_not_inf(<4 x double> %arg) {
+; CHECK-LABEL: define <4 x i1> @test_vec_is_not_inf(
+; CHECK-SAME: <4 x double> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une <4 x double> [[ABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000>
+; CHECK-NEXT:    ret <4 x i1> [[RET]]
+;
+  %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
+  %ret = fcmp une <4 x double> %abs, splat (double 0x7FF0000000000000)
+  ret <4 x i1> %ret
+}
+
+define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_inf(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf(fp128 %arg) {
+; CHECK-LABEL: define i1 @test_fp128_is_not_inf(
+; CHECK-SAME: fp128 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une fp128 [[ABS]], 0xL00000000000000007FFF000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+  %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
+  ret i1 %ret
+}
+
+define i1 @test_x86_fp80_is_inf_or_nan(x86_fp80 %arg) {
+; CHECK-LABEL: define i1 @test_x86_fp80_is_inf_or_nan(
+; CHECK-SAME: x86_fp80 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq x86_fp80 [[ABS]], 0xK7FFF8000000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
+  %ret = fcmp ueq x86_fp80 %abs, 0xK7FFF8000000000000000
+  ret i1 %ret
+}
+
+define i1 @test_x86_fp80_is_not_inf_or_nan(x86_fp80 %arg) {
+; CHECK-LABEL: define i1 @test_x86_fp80_is_not_inf_or_nan(
+; CHECK-SAME: x86_fp80 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp one x86_fp80 [[ABS]], 0xK7FFF8000000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
+  %ret = fcmp one x86_fp80 %abs, 0xK7FFF8000000000000000
+  ret i1 %ret
+}
+
+define i1 @test_x86_fp80_is_inf(x86_fp80 %arg) {
+; CHECK-LABEL: define i1 @test_x86_fp80_is_inf(
+; CHECK-SAME: x86_fp80 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq x86_fp80 [[ABS]], 0xK7FFF8000000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
+  %ret = fcmp oeq x86_fp80 %abs, 0xK7FFF8000000000000000
+  ret i1 %ret
+}
+
+define i1 @test_x86_fp80_is_not_inf(x86_fp80 %arg) {
+; CHECK-LABEL: define i1 @test_x86_fp80_is_not_inf(
+; CHECK-SAME: x86_fp80 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ABS:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[ARG]])
+; CHECK-NEXT:    [[RET:%.*]] = fcmp une x86_fp80 [[ABS]], 0xK7FFF8000000000000000
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
+  %ret = fcmp une x86_fp80 %abs, 0xK7FFF8000000000000000
+  ret i1 %ret
+}

>From 7be98788eae7385360e53a1eabddba220abbd10b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 13 Feb 2024 13:31:32 +0800
Subject: [PATCH 2/4] [CodeGenPrepare] Reverse the canonicalization of
 isInf/isNanOrInf

---
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  27 ++
 llvm/test/CodeGen/AArch64/isinf.ll            |  22 +-
 llvm/test/CodeGen/AMDGPU/fp-classify.ll       | 196 +++++++------
 llvm/test/CodeGen/AMDGPU/fract-match.ll       | 259 +++++++++---------
 .../CodeGenPrepare/AArch64/fpclass-test.ll    |  60 ++--
 .../CodeGenPrepare/RISCV/fpclass-test.ll      |  60 ++--
 .../CodeGenPrepare/X86/fpclass-test.ll        |  80 +++---
 7 files changed, 358 insertions(+), 346 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 09c4922d8822cc..33e4aedb392d75 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1943,6 +1943,30 @@ static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
   return false;
 }
 
+static bool foldFCmpToFPClassTest(CmpInst *Cmp) {
+  FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
+  if (!FCmp)
+    return false;
+
+  // Reverse the canonicalization if it is a FP class test
+  auto ShouldReverseTransform = [](FPClassTest ClassTest) {
+    return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
+  };
+  auto [ClassVal, ClassTest] =
+      fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
+                      FCmp->getOperand(0), FCmp->getOperand(1));
+  if (ClassVal && (ShouldReverseTransform(ClassTest) ||
+                   ShouldReverseTransform(~ClassTest))) {
+    IRBuilder<> Builder(Cmp);
+    Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
+    Cmp->replaceAllUsesWith(IsFPClass);
+    RecursivelyDeleteTriviallyDeadInstructions(Cmp);
+    return true;
+  }
+
+  return false;
+}
+
 bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (sinkCmpExpression(Cmp, *TLI))
     return true;
@@ -1959,6 +1983,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
     return true;
 
+  if (foldFCmpToFPClassTest(Cmp))
+    return true;
+
   return false;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index 458bd7eeba16cf..834417b98743a8 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,22 +58,14 @@ define i32 @replace_isinf_call_f64(double %x) {
 define i32 @replace_isinf_call_f128(fp128 %x) {
 ; CHECK-LABEL: replace_isinf_call_f128:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    str q0, [sp]
-; CHECK-NEXT:    ldrb w8, [sp, #15]
-; CHECK-NEXT:    and w8, w8, #0x7f
-; CHECK-NEXT:    strb w8, [sp, #15]
-; CHECK-NEXT:    adrp x8, .LCPI3_0
-; CHECK-NEXT:    ldr q0, [sp]
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT:    bl __eqtf2
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    str q0, [sp, #-16]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldp x9, x8, [sp], #16
+; CHECK-NEXT:    and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT:    eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, eq
-; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
   %cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index 6fa7df913812a3..ed9ce4d62383b1 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -61,10 +61,10 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
 ; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT:    v_mov_b32_e32 v0, 0x207
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cmp_nlg_f32_e64 s[0:1], |s0|, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -72,11 +72,11 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT:    v_mov_b32_e32 v0, 0x207
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_nlg_f32_e64 s[2:3], |s2|, v0
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_nlg_f32_e64 s2, 0x7f800000, |s2|
+; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x207
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -143,25 +143,29 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
 ; SI-LABEL: test_isfinite_pattern_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
+; SI-NEXT:    v_mov_b32_e32 v0, 0x1fb
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isfinite_pattern_0:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
+; VI-NEXT:    v_mov_b32_e32 v0, 0x1fb
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT:    v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -173,8 +177,10 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_o_f32_e64 s3, s2, s2
+; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1fb
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_b32 s2, s3, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
@@ -349,13 +355,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT:    v_mov_b32_e32 v0, 0x1fb
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b32 s4, s0
 ; SI-NEXT:    s_mov_b32 s5, s1
 ; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
-; SI-NEXT:    v_cmp_neq_f32_e64 s[2:3], |s3|, v0
-; SI-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s3, v0
+; SI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
@@ -363,11 +369,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
 ; VI-LABEL: test_isfinite_not_pattern_2:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT:    v_mov_b32_e32 v0, 0x1fb
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_cmp_o_f32_e64 s[4:5], s2, s2
-; VI-NEXT:    v_cmp_neq_f32_e64 s[2:3], |s3|, v0
-; VI-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s3, v0
+; VI-NEXT:    s_and_b64 s[2:3], s[4:5], vcc
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
@@ -380,7 +386,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    v_cmp_o_f32_e64 s2, s2, s2
-; GFX11-NEXT:    v_cmp_neq_f32_e64 s3, 0x7f800000, |s3|
+; GFX11-NEXT:    v_cmp_class_f32_e64 s3, s3, 0x1fb
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s2, s2, s3
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
@@ -405,11 +411,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT:    v_mov_b32_e32 v0, 0x1fb
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_cmp_u_f32_e64 s[0:1], s2, s2
-; SI-NEXT:    v_cmp_neq_f32_e64 s[2:3], |s2|, v0
-; SI-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
@@ -418,11 +424,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT:    v_mov_b32_e32 v0, 0x1fb
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_cmp_u_f32_e64 s[2:3], s4, s4
-; VI-NEXT:    v_cmp_neq_f32_e64 s[4:5], |s4|, v0
-; VI-NEXT:    s_and_b64 s[2:3], s[2:3], s[4:5]
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
@@ -437,7 +443,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    v_cmp_u_f32_e64 s3, s2, s2
-; GFX11-NEXT:    v_cmp_neq_f32_e64 s2, 0x7f800000, |s2|
+; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1fb
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s2, s3, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
@@ -458,25 +464,29 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
 ; SI-LABEL: test_isfinite_pattern_4:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isfinite_pattern_4:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT:    v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -488,8 +498,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_o_f32_e64 s3, s2, s2
 ; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_b32 s2, s3, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
@@ -508,25 +520,29 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1)
 ; SI-LABEL: test_isfinite_pattern_4_commute_and:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isfinite_pattern_4_commute_and:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT:    v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT:    s_and_b64 s[2:3], vcc, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -538,8 +554,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1)
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_o_f32_e64 s3, s2, s2
 ; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_b32 s2, s2, s3
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
@@ -618,16 +636,16 @@ define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrsp
 define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
 ; SI-LABEL: test_isinf_pattern_f16:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s1, 0x7f800000
+; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cvt_f32_f16_e64 v0, |s0|
-; SI-NEXT:    v_cmp_eq_f32_e32 vcc, s1, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_and_b32 s4, s4, 0x7fff
+; SI-NEXT:    s_cmpk_eq_i32 s4, 0x7c00
+; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isinf_pattern_f16:
@@ -667,27 +685,32 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou
 define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
 ; SI-LABEL: test_isfinite_pattern_0_f16:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_movk_i32 s1, 0x1f8
+; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; SI-NEXT:    v_cmp_class_f32_e64 s[0:1], v0, s1
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, s4
+; SI-NEXT:    s_and_b32 s4, s4, 0x7fff
+; SI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; SI-NEXT:    s_cmpk_lg_i32 s4, 0x7c00
+; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; SI-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isfinite_pattern_0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
+; VI-NEXT:    v_mov_b32_e32 v0, 0x1fb
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_class_f16_e32 vcc, s2, v0
+; VI-NEXT:    v_cmp_o_f16_e64 s[2:3], s4, s4
+; VI-NEXT:    v_cmp_class_f16_e32 vcc, s4, v0
+; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -699,8 +722,10 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x1f8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cmp_o_f16_e64 s3, s2, s2
+; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x1fb
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_b32 s2, s3, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
@@ -718,27 +743,32 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
 define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
 ; SI-LABEL: test_isfinite_pattern_4_f16:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_movk_i32 s1, 0x1f8
+; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
-; SI-NEXT:    v_cmp_class_f32_e64 s[0:1], v0, s1
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, s4
+; SI-NEXT:    s_and_b32 s4, s4, 0x7fff
+; SI-NEXT:    v_cmp_o_f32_e32 vcc, v0, v0
+; SI-NEXT:    s_cmpk_lt_i32 s4, 0x7c00
+; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; SI-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isfinite_pattern_4_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_class_f16_e32 vcc, s2, v0
+; VI-NEXT:    v_cmp_o_f16_e64 s[2:3], s4, s4
+; VI-NEXT:    v_cmp_class_f16_e32 vcc, s4, v0
+; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -750,8 +780,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_cmp_o_f16_e64 s3, s2, s2
 ; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x1f8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_b32 s2, s3, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index 3a0b8259d08496..d65dff7bc44627 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -53,14 +53,14 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
 ; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
 ; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
 ; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT:    v_mov_b32_e32 v5, 0x204
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
-; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX6-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
 ; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
@@ -68,15 +68,15 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
 ; GFX7-LABEL: safe_math_fract_f32:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    v_mov_b32_e32 v5, 0x204
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    v_fract_f32_e32 v4, v0
-; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
 ; GFX7-NEXT:    v_floor_f32_e32 v3, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
 ; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -84,11 +84,11 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
 ; GFX8-LABEL: safe_math_fract_f32:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT:    v_mov_b32_e32 v5, 0x204
 ; GFX8-NEXT:    v_fract_f32_e32 v4, v0
-; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
+; GFX8-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
 ; GFX8-NEXT:    v_floor_f32_e32 v3, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
 ; GFX8-NEXT:    global_store_dword v[1:2], v3, off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -97,10 +97,10 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fract_f32_e32 v3, v0
-; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT:    v_cmp_class_f32_e64 s0, v0, 0x204
 ; GFX11-NEXT:    v_floor_f32_e32 v4, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v3, 0, s0
 ; GFX11-NEXT:    global_store_b32 v[1:2], v4, off
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -210,14 +210,14 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_floor_f32_e32 v3, v0
 ; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT:    v_mov_b32_e32 v5, 0x204
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX6-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
 ; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
@@ -227,14 +227,14 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_floor_f32_e32 v3, v0
 ; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    v_mov_b32_e32 v5, 0x204
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
 ; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -244,10 +244,10 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_floor_f32_e32 v3, v0
 ; GFX8-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT:    v_mov_b32_e32 v5, 0x204
 ; GFX8-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
 ; GFX8-NEXT:    global_store_dword v[1:2], v3, off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -256,12 +256,12 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_floor_f32_e32 v3, v0
-; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT:    v_cmp_class_f32_e64 s0, v0, 0x204
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_f32_e32 v4, v0, v3
 ; GFX11-NEXT:    global_store_b32 v[1:2], v3, off
 ; GFX11-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %floor = tail call float @llvm.floor.f32(float %x)
@@ -1705,16 +1705,16 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
 ; GFX6-NEXT:    v_min_f32_e32 v7, 0x3f7fffff, v7
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v1, vcc
 ; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX6-NEXT:    s_movk_i32 s10, 0x204
+; GFX6-NEXT:    v_mov_b32_e32 v8, 0x204
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
-; GFX6-NEXT:    v_cmp_class_f32_e64 s[8:9], v0, s10
+; GFX6-NEXT:    v_cmp_class_f32_e32 vcc, v0, v8
 ; GFX6-NEXT:    s_mov_b32 s6, 0
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v7, 0, s[8:9]
-; GFX6-NEXT:    v_cmp_class_f32_e64 s[8:9], v1, s10
+; GFX6-NEXT:    v_cndmask_b32_e64 v0, v7, 0, vcc
+; GFX6-NEXT:    v_cmp_class_f32_e32 vcc, v1, v8
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v6, 0, s[8:9]
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, v6, 0, vcc
 ; GFX6-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
@@ -1722,19 +1722,19 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
 ; GFX7-LABEL: safe_math_fract_v2f32:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    v_mov_b32_e32 v8, 0x204
 ; GFX7-NEXT:    v_fract_f32_e32 v6, v0
-; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT:    v_cmp_class_f32_e32 vcc, v0, v8
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    v_floor_f32_e32 v4, v0
 ; GFX7-NEXT:    v_fract_f32_e32 v7, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
-; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v1|, s8
+; GFX7-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX7-NEXT:    v_cmp_class_f32_e32 vcc, v1, v8
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
 ; GFX7-NEXT:    v_floor_f32_e32 v5, v1
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v1, v7, 0, vcc
 ; GFX7-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -1742,15 +1742,15 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
 ; GFX8-LABEL: safe_math_fract_v2f32:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT:    v_mov_b32_e32 v8, 0x204
 ; GFX8-NEXT:    v_fract_f32_e32 v6, v0
-; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
+; GFX8-NEXT:    v_cmp_class_f32_e32 vcc, v0, v8
 ; GFX8-NEXT:    v_floor_f32_e32 v4, v0
 ; GFX8-NEXT:    v_fract_f32_e32 v7, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
-; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v1|, s4
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX8-NEXT:    v_cmp_class_f32_e32 vcc, v1, v8
 ; GFX8-NEXT:    v_floor_f32_e32 v5, v1
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v7, 0, vcc
 ; GFX8-NEXT:    global_store_dwordx2 v[2:3], v[4:5], off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -1759,14 +1759,15 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fract_f32_e32 v6, v0
-; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT:    v_cmp_class_f32_e64 s0, v0, 0x204
 ; GFX11-NEXT:    v_fract_f32_e32 v7, v1
 ; GFX11-NEXT:    v_floor_f32_e32 v4, v0
 ; GFX11-NEXT:    v_floor_f32_e32 v5, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v6, 0, s0
+; GFX11-NEXT:    v_cmp_class_f32_e64 s0, v1, 0x204
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[4:5], off
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc_lo
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v7, 0, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
@@ -1823,17 +1824,16 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
 ; GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], -v[4:5]
 ; GFX6-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX6-NEXT:    v_min_f64 v[6:7], v[6:7], s[8:9]
-; GFX6-NEXT:    s_mov_b32 s8, 0
-; GFX6-NEXT:    s_mov_b32 s9, 0x7ff00000
+; GFX6-NEXT:    v_mov_b32_e32 v8, 0x204
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
-; GFX6-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9]
+; GFX6-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, v7, 0, vcc
 ; GFX6-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
@@ -1841,17 +1841,16 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
 ; GFX7-LABEL: safe_math_fract_f64:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    s_mov_b32 s4, 0
-; GFX7-NEXT:    s_mov_b32 s5, 0x7ff00000
+; GFX7-NEXT:    v_mov_b32_e32 v6, 0x204
 ; GFX7-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
-; GFX7-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX7-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v6
 ; GFX7-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
 ; GFX7-NEXT:    buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -1859,13 +1858,12 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
 ; GFX8-LABEL: safe_math_fract_f64:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_mov_b32 s4, 0
-; GFX8-NEXT:    s_mov_b32 s5, 0x7ff00000
+; GFX8-NEXT:    v_mov_b32_e32 v6, 0x204
 ; GFX8-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX8-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v6
 ; GFX8-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
 ; GFX8-NEXT:    global_store_dwordx2 v[2:3], v[6:7], off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -1874,10 +1872,11 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
-; GFX11-NEXT:    v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
+; GFX11-NEXT:    v_cmp_class_f64_e64 s0, v[0:1], 0x204
 ; GFX11-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s0
+; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0, s0
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[6:7], off
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -1937,21 +1936,22 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
 ; GFX6:       ; %bb.0: ; %entry
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v0
+; GFX6-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_floor_f32_e32 v3, v0
-; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fe000, v4
-; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
-; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX6-NEXT:    buffer_store_short v3, v[1:2], s[4:7], 0 addr64
+; GFX6-NEXT:    v_floor_f32_e32 v4, v3
+; GFX6-NEXT:    v_sub_f32_e32 v5, v3, v4
+; GFX6-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX6-NEXT:    v_min_f32_e32 v5, 0x3f7fe000, v5
+; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX6-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v0
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX6-NEXT:    buffer_store_short v4, v[1:2], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1959,32 +1959,33 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
-; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT:    v_cvt_f32_f16_e32 v3, v0
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
-; GFX7-NEXT:    v_floor_f32_e32 v3, v0
-; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX7-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT:    v_min_f32_e32 v4, 0x3f7fe000, v4
-; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX7-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
-; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
-; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX7-NEXT:    buffer_store_short v3, v[1:2], s[4:7], 0 addr64
+; GFX7-NEXT:    v_floor_f32_e32 v4, v3
+; GFX7-NEXT:    v_sub_f32_e32 v5, v3, v4
+; GFX7-NEXT:    v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT:    v_min_f32_e32 v5, 0x3f7fe000, v5
+; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v0
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX7-NEXT:    buffer_store_short v4, v[1:2], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: safe_math_fract_f16:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_movk_i32 s4, 0x7c00
+; GFX8-NEXT:    v_mov_b32_e32 v5, 0x204
 ; GFX8-NEXT:    v_fract_f16_e32 v4, v0
-; GFX8-NEXT:    v_cmp_neq_f16_e64 vcc, |v0|, s4
+; GFX8-NEXT:    v_cmp_class_f16_e32 vcc, v0, v5
 ; GFX8-NEXT:    v_floor_f16_e32 v3, v0
-; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
 ; GFX8-NEXT:    global_store_short v[1:2], v3, off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -1993,10 +1994,10 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fract_f16_e32 v3, v0
-; GFX11-NEXT:    v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0|
+; GFX11-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
 ; GFX11-NEXT:    v_floor_f16_e32 v4, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, v3, 0, s0
 ; GFX11-NEXT:    global_store_b16 v[1:2], v4, off
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -2062,12 +2063,12 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v1
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v0
-; GFX6-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; GFX6-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
+; GFX6-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX6-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
 ; GFX6-NEXT:    v_floor_f32_e32 v6, v4
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v7, v6
 ; GFX6-NEXT:    v_floor_f32_e32 v8, v5
@@ -2080,10 +2081,10 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
 ; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX6-NEXT:    v_cmp_neq_f32_e32 vcc, s8, v0
+; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v0
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
-; GFX6-NEXT:    v_cmp_neq_f32_e32 vcc, s8, v1
+; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v1
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
@@ -2098,12 +2099,12 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT:    s_movk_i32 s8, 0x7c00
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v4, v1
 ; GFX7-NEXT:    v_cvt_f32_f16_e32 v5, v0
-; GFX7-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
-; GFX7-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX7-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
 ; GFX7-NEXT:    v_floor_f32_e32 v6, v4
 ; GFX7-NEXT:    v_cvt_f16_f32_e32 v7, v6
 ; GFX7-NEXT:    v_floor_f32_e32 v8, v5
@@ -2116,10 +2117,10 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
 ; GFX7-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
 ; GFX7-NEXT:    v_cmp_u_f32_e32 vcc, v5, v5
 ; GFX7-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT:    v_cmp_neq_f32_e32 vcc, s8, v0
+; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v0
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
 ; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v5, vcc
-; GFX7-NEXT:    v_cmp_neq_f32_e32 vcc, s8, v1
+; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, s8, v1
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
@@ -2133,16 +2134,16 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT:    s_movk_i32 s6, 0x204
+; GFX8-NEXT:    v_mov_b32_e32 v7, 0x204
 ; GFX8-NEXT:    v_floor_f16_e32 v4, v3
 ; GFX8-NEXT:    v_floor_f16_e32 v5, v0
 ; GFX8-NEXT:    v_fract_f16_e32 v6, v3
-; GFX8-NEXT:    v_cmp_class_f16_e64 s[4:5], v3, s6
+; GFX8-NEXT:    v_cmp_class_f16_e32 vcc, v3, v7
 ; GFX8-NEXT:    v_pack_b32_f16 v4, v5, v4
 ; GFX8-NEXT:    v_fract_f16_e32 v5, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v6, 0, s[4:5]
-; GFX8-NEXT:    v_cmp_class_f16_e64 s[4:5], v0, s6
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v5, 0, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v6, 0, vcc
+; GFX8-NEXT:    v_cmp_class_f16_e32 vcc, v0, v7
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, v5, 0, vcc
 ; GFX8-NEXT:    v_pack_b32_f16 v0, v0, v3
 ; GFX8-NEXT:    global_store_dword v[1:2], v4, off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
@@ -2237,19 +2238,19 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v11, v11, v3, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v10, v10, v2, vcc
 ; GFX6-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GFX6-NEXT:    s_movk_i32 s10, 0x204
-; GFX6-NEXT:    v_cmp_class_f64_e64 s[8:9], v[0:1], s10
+; GFX6-NEXT:    v_mov_b32_e32 v14, 0x204
 ; GFX6-NEXT:    v_cndmask_b32_e32 v13, v13, v1, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v12, v12, v0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v12, 0, s[8:9]
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v13, 0, s[8:9]
-; GFX6-NEXT:    v_cmp_class_f64_e64 s[8:9], v[2:3], s10
+; GFX6-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v14
 ; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v1, v13, 0, vcc
+; GFX6-NEXT:    v_cmp_class_f64_e32 vcc, v[2:3], v14
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_cndmask_b32_e64 v2, v10, 0, s[8:9]
-; GFX6-NEXT:    v_cndmask_b32_e64 v3, v11, 0, s[8:9]
+; GFX6-NEXT:    v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e64 v3, v11, 0, vcc
 ; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
@@ -2257,39 +2258,39 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc
 ; GFX7-LABEL: safe_math_fract_v2f64:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    s_movk_i32 s4, 0x204
+; GFX7-NEXT:    v_mov_b32_e32 v6, 0x204
 ; GFX7-NEXT:    v_fract_f64_e32 v[10:11], v[0:1]
-; GFX7-NEXT:    v_cmp_class_f64_e64 s[8:9], v[0:1], s4
+; GFX7-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v6
 ; GFX7-NEXT:    v_fract_f64_e32 v[12:13], v[2:3]
-; GFX7-NEXT:    v_cmp_class_f64_e64 s[10:11], v[2:3], s4
+; GFX7-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v6
 ; GFX7-NEXT:    v_floor_f64_e32 v[8:9], v[2:3]
 ; GFX7-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
-; GFX7-NEXT:    s_mov_b32 s6, 0
-; GFX7-NEXT:    s_mov_b32 s7, 0xf000
-; GFX7-NEXT:    s_mov_b32 s4, s6
-; GFX7-NEXT:    s_mov_b32 s5, s6
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v10, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v11, 0, s[8:9]
-; GFX7-NEXT:    v_cndmask_b32_e64 v2, v12, 0, s[10:11]
-; GFX7-NEXT:    v_cndmask_b32_e64 v3, v13, 0, s[10:11]
-; GFX7-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
+; GFX7-NEXT:    s_mov_b32 s10, 0
+; GFX7-NEXT:    s_mov_b32 s11, 0xf000
+; GFX7-NEXT:    s_mov_b32 s8, s10
+; GFX7-NEXT:    s_mov_b32 s9, s10
+; GFX7-NEXT:    v_cndmask_b32_e64 v0, v10, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v1, v11, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e64 v2, v12, 0, s[4:5]
+; GFX7-NEXT:    v_cndmask_b32_e64 v3, v13, 0, s[4:5]
+; GFX7-NEXT:    buffer_store_dwordx4 v[6:9], v[4:5], s[8:11], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: safe_math_fract_v2f64:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_movk_i32 s6, 0x204
+; GFX8-NEXT:    v_mov_b32_e32 v6, 0x204
 ; GFX8-NEXT:    v_fract_f64_e32 v[10:11], v[0:1]
-; GFX8-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], s6
+; GFX8-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v6
 ; GFX8-NEXT:    v_fract_f64_e32 v[12:13], v[2:3]
-; GFX8-NEXT:    v_cmp_class_f64_e64 s[6:7], v[2:3], s6
+; GFX8-NEXT:    v_cmp_class_f64_e64 s[4:5], v[2:3], v6
 ; GFX8-NEXT:    v_floor_f64_e32 v[8:9], v[2:3]
 ; GFX8-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v10, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v11, 0, s[4:5]
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v12, 0, s[6:7]
-; GFX8-NEXT:    v_cndmask_b32_e64 v3, v13, 0, s[6:7]
+; GFX8-NEXT:    v_cndmask_b32_e64 v0, v10, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v1, v11, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v12, 0, s[4:5]
+; GFX8-NEXT:    v_cndmask_b32_e64 v3, v13, 0, s[4:5]
 ; GFX8-NEXT:    global_store_dwordx4 v[4:5], v[6:9], off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll
index 436b3679ec860a..63ab22e96ad2ad 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/fpclass-test.ll
@@ -4,9 +4,8 @@
 define i1 @test_is_inf_or_nan(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_inf_or_nan(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 519)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp ueq double %abs, 0x7FF0000000000000
@@ -16,9 +15,8 @@ define i1 @test_is_inf_or_nan(double %arg) {
 define i1 @test_is_not_inf_or_nan(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_not_inf_or_nan(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 504)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp one double %abs, 0x7FF0000000000000
@@ -28,9 +26,8 @@ define i1 @test_is_not_inf_or_nan(double %arg) {
 define i1 @test_is_inf(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_inf(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 516)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp oeq double %abs, 0x7FF0000000000000
@@ -40,9 +37,8 @@ define i1 @test_is_inf(double %arg) {
 define i1 @test_is_not_inf(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_not_inf(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 507)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp une double %abs, 0x7FF0000000000000
@@ -52,9 +48,8 @@ define i1 @test_is_not_inf(double %arg) {
 define <vscale x 2 x i1> @test_vec_is_inf_or_nan(<vscale x 2 x double> %arg) {
 ; CHECK-LABEL: define <vscale x 2 x i1> @test_vec_is_inf_or_nan(
 ; CHECK-SAME: <vscale x 2 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq <vscale x 2 x double> [[ABS]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 2 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.is.fpclass.nxv2f64(<vscale x 2 x double> [[ARG]], i32 519)
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[TMP1]]
 ;
   %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
   %ret = fcmp ueq <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
@@ -64,9 +59,8 @@ define <vscale x 2 x i1> @test_vec_is_inf_or_nan(<vscale x 2 x double> %arg) {
 define <vscale x 2 x i1> @test_vec_is_not_inf_or_nan(<vscale x 2 x double> %arg) {
 ; CHECK-LABEL: define <vscale x 2 x i1> @test_vec_is_not_inf_or_nan(
 ; CHECK-SAME: <vscale x 2 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one <vscale x 2 x double> [[ABS]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 2 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.is.fpclass.nxv2f64(<vscale x 2 x double> [[ARG]], i32 504)
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[TMP1]]
 ;
   %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
   %ret = fcmp one <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
@@ -76,9 +70,8 @@ define <vscale x 2 x i1> @test_vec_is_not_inf_or_nan(<vscale x 2 x double> %arg)
 define <vscale x 2 x i1> @test_vec_is_inf(<vscale x 2 x double> %arg) {
 ; CHECK-LABEL: define <vscale x 2 x i1> @test_vec_is_inf(
 ; CHECK-SAME: <vscale x 2 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq <vscale x 2 x double> [[ABS]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 2 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.is.fpclass.nxv2f64(<vscale x 2 x double> [[ARG]], i32 516)
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[TMP1]]
 ;
   %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
   %ret = fcmp oeq <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
@@ -88,9 +81,8 @@ define <vscale x 2 x i1> @test_vec_is_inf(<vscale x 2 x double> %arg) {
 define <vscale x 2 x i1> @test_vec_is_not_inf(<vscale x 2 x double> %arg) {
 ; CHECK-LABEL: define <vscale x 2 x i1> @test_vec_is_not_inf(
 ; CHECK-SAME: <vscale x 2 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une <vscale x 2 x double> [[ABS]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 2 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.is.fpclass.nxv2f64(<vscale x 2 x double> [[ARG]], i32 507)
+; CHECK-NEXT:    ret <vscale x 2 x i1> [[TMP1]]
 ;
   %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
   %ret = fcmp une <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
@@ -100,9 +92,8 @@ define <vscale x 2 x i1> @test_vec_is_not_inf(<vscale x 2 x double> %arg) {
 define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 519)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -112,9 +103,8 @@ define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
 define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 504)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -124,9 +114,8 @@ define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
 define i1 @test_fp128_is_inf(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_inf(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 516)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -136,9 +125,8 @@ define i1 @test_fp128_is_inf(fp128 %arg) {
 define i1 @test_fp128_is_not_inf(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_not_inf(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 507)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll
index 95047da2637868..7c00218bdcce3d 100644
--- a/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/fpclass-test.ll
@@ -4,9 +4,8 @@
 define i1 @test_is_inf_or_nan(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_inf_or_nan(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 519)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp ueq double %abs, 0x7FF0000000000000
@@ -16,9 +15,8 @@ define i1 @test_is_inf_or_nan(double %arg) {
 define i1 @test_is_not_inf_or_nan(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_not_inf_or_nan(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 504)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp one double %abs, 0x7FF0000000000000
@@ -28,9 +26,8 @@ define i1 @test_is_not_inf_or_nan(double %arg) {
 define i1 @test_is_inf(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_inf(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 516)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp oeq double %abs, 0x7FF0000000000000
@@ -40,9 +37,8 @@ define i1 @test_is_inf(double %arg) {
 define i1 @test_is_not_inf(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_not_inf(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 507)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp une double %abs, 0x7FF0000000000000
@@ -52,9 +48,8 @@ define i1 @test_is_not_inf(double %arg) {
 define <vscale x 4 x i1> @test_vec_is_inf_or_nan(<vscale x 4 x double> %arg) {
 ; CHECK-LABEL: define <vscale x 4 x i1> @test_vec_is_inf_or_nan(
 ; CHECK-SAME: <vscale x 4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq <vscale x 4 x double> [[ABS]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 4 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.is.fpclass.nxv4f64(<vscale x 4 x double> [[ARG]], i32 519)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[TMP1]]
 ;
   %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
   %ret = fcmp ueq <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -64,9 +59,8 @@ define <vscale x 4 x i1> @test_vec_is_inf_or_nan(<vscale x 4 x double> %arg) {
 define <vscale x 4 x i1> @test_vec_is_not_inf_or_nan(<vscale x 4 x double> %arg) {
 ; CHECK-LABEL: define <vscale x 4 x i1> @test_vec_is_not_inf_or_nan(
 ; CHECK-SAME: <vscale x 4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one <vscale x 4 x double> [[ABS]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 4 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.is.fpclass.nxv4f64(<vscale x 4 x double> [[ARG]], i32 504)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[TMP1]]
 ;
   %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
   %ret = fcmp one <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -76,9 +70,8 @@ define <vscale x 4 x i1> @test_vec_is_not_inf_or_nan(<vscale x 4 x double> %arg)
 define <vscale x 4 x i1> @test_vec_is_inf(<vscale x 4 x double> %arg) {
 ; CHECK-LABEL: define <vscale x 4 x i1> @test_vec_is_inf(
 ; CHECK-SAME: <vscale x 4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq <vscale x 4 x double> [[ABS]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 4 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.is.fpclass.nxv4f64(<vscale x 4 x double> [[ARG]], i32 516)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[TMP1]]
 ;
   %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
   %ret = fcmp oeq <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -88,9 +81,8 @@ define <vscale x 4 x i1> @test_vec_is_inf(<vscale x 4 x double> %arg) {
 define <vscale x 4 x i1> @test_vec_is_not_inf(<vscale x 4 x double> %arg) {
 ; CHECK-LABEL: define <vscale x 4 x i1> @test_vec_is_not_inf(
 ; CHECK-SAME: <vscale x 4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une <vscale x 4 x double> [[ABS]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 0x7FF0000000000000, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 4 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.is.fpclass.nxv4f64(<vscale x 4 x double> [[ARG]], i32 507)
+; CHECK-NEXT:    ret <vscale x 4 x i1> [[TMP1]]
 ;
   %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
   %ret = fcmp une <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -100,9 +92,8 @@ define <vscale x 4 x i1> @test_vec_is_not_inf(<vscale x 4 x double> %arg) {
 define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 519)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -112,9 +103,8 @@ define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
 define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 504)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -124,9 +114,8 @@ define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
 define i1 @test_fp128_is_inf(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_inf(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 516)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -136,9 +125,8 @@ define i1 @test_fp128_is_inf(fp128 %arg) {
 define i1 @test_fp128_is_not_inf(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_not_inf(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 507)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll
index 8b924f1640e3a4..525caeb3e79a10 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/fpclass-test.ll
@@ -4,9 +4,8 @@
 define i1 @test_is_inf_or_nan(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_inf_or_nan(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 519)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp ueq double %abs, 0x7FF0000000000000
@@ -16,9 +15,8 @@ define i1 @test_is_inf_or_nan(double %arg) {
 define i1 @test_is_not_inf_or_nan(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_not_inf_or_nan(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 504)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp one double %abs, 0x7FF0000000000000
@@ -28,9 +26,8 @@ define i1 @test_is_not_inf_or_nan(double %arg) {
 define i1 @test_is_inf(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_inf(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 516)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp oeq double %abs, 0x7FF0000000000000
@@ -40,9 +37,8 @@ define i1 @test_is_inf(double %arg) {
 define i1 @test_is_not_inf(double %arg) {
 ; CHECK-LABEL: define i1 @test_is_not_inf(
 ; CHECK-SAME: double [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call double @llvm.fabs.f64(double [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une double [[ABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f64(double [[ARG]], i32 507)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call double @llvm.fabs.f64(double %arg)
   %ret = fcmp une double %abs, 0x7FF0000000000000
@@ -52,9 +48,8 @@ define i1 @test_is_not_inf(double %arg) {
 define <4 x i1> @test_vec_is_inf_or_nan(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x i1> @test_vec_is_inf_or_nan(
 ; CHECK-SAME: <4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq <4 x double> [[ABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000>
-; CHECK-NEXT:    ret <4 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.is.fpclass.v4f64(<4 x double> [[ARG]], i32 519)
+; CHECK-NEXT:    ret <4 x i1> [[TMP1]]
 ;
   %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
   %ret = fcmp ueq <4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -64,9 +59,8 @@ define <4 x i1> @test_vec_is_inf_or_nan(<4 x double> %arg) {
 define <4 x i1> @test_vec_is_not_inf_or_nan(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x i1> @test_vec_is_not_inf_or_nan(
 ; CHECK-SAME: <4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one <4 x double> [[ABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000>
-; CHECK-NEXT:    ret <4 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.is.fpclass.v4f64(<4 x double> [[ARG]], i32 504)
+; CHECK-NEXT:    ret <4 x i1> [[TMP1]]
 ;
   %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
   %ret = fcmp one <4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -76,9 +70,8 @@ define <4 x i1> @test_vec_is_not_inf_or_nan(<4 x double> %arg) {
 define <4 x i1> @test_vec_is_inf(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x i1> @test_vec_is_inf(
 ; CHECK-SAME: <4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq <4 x double> [[ABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000>
-; CHECK-NEXT:    ret <4 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.is.fpclass.v4f64(<4 x double> [[ARG]], i32 516)
+; CHECK-NEXT:    ret <4 x i1> [[TMP1]]
 ;
   %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
   %ret = fcmp oeq <4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -88,9 +81,8 @@ define <4 x i1> @test_vec_is_inf(<4 x double> %arg) {
 define <4 x i1> @test_vec_is_not_inf(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x i1> @test_vec_is_not_inf(
 ; CHECK-SAME: <4 x double> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une <4 x double> [[ABS]], <double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000, double 0x7FF0000000000000>
-; CHECK-NEXT:    ret <4 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.is.fpclass.v4f64(<4 x double> [[ARG]], i32 507)
+; CHECK-NEXT:    ret <4 x i1> [[TMP1]]
 ;
   %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
   %ret = fcmp une <4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -100,9 +92,8 @@ define <4 x i1> @test_vec_is_not_inf(<4 x double> %arg) {
 define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_inf_or_nan(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 519)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -112,9 +103,8 @@ define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
 define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_not_inf_or_nan(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 504)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -124,9 +114,8 @@ define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
 define i1 @test_fp128_is_inf(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_inf(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 516)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -136,9 +125,8 @@ define i1 @test_fp128_is_inf(fp128 %arg) {
 define i1 @test_fp128_is_not_inf(fp128 %arg) {
 ; CHECK-LABEL: define i1 @test_fp128_is_not_inf(
 ; CHECK-SAME: fp128 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une fp128 [[ABS]], 0xL00000000000000007FFF000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f128(fp128 [[ARG]], i32 507)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
   %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -148,9 +136,8 @@ define i1 @test_fp128_is_not_inf(fp128 %arg) {
 define i1 @test_x86_fp80_is_inf_or_nan(x86_fp80 %arg) {
 ; CHECK-LABEL: define i1 @test_x86_fp80_is_inf_or_nan(
 ; CHECK-SAME: x86_fp80 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp ueq x86_fp80 [[ABS]], 0xK7FFF8000000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[ARG]], i32 519)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
   %ret = fcmp ueq x86_fp80 %abs, 0xK7FFF8000000000000000
@@ -160,9 +147,8 @@ define i1 @test_x86_fp80_is_inf_or_nan(x86_fp80 %arg) {
 define i1 @test_x86_fp80_is_not_inf_or_nan(x86_fp80 %arg) {
 ; CHECK-LABEL: define i1 @test_x86_fp80_is_not_inf_or_nan(
 ; CHECK-SAME: x86_fp80 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp one x86_fp80 [[ABS]], 0xK7FFF8000000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[ARG]], i32 504)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
   %ret = fcmp one x86_fp80 %abs, 0xK7FFF8000000000000000
@@ -172,9 +158,8 @@ define i1 @test_x86_fp80_is_not_inf_or_nan(x86_fp80 %arg) {
 define i1 @test_x86_fp80_is_inf(x86_fp80 %arg) {
 ; CHECK-LABEL: define i1 @test_x86_fp80_is_inf(
 ; CHECK-SAME: x86_fp80 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp oeq x86_fp80 [[ABS]], 0xK7FFF8000000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[ARG]], i32 516)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
   %ret = fcmp oeq x86_fp80 %abs, 0xK7FFF8000000000000000
@@ -184,9 +169,8 @@ define i1 @test_x86_fp80_is_inf(x86_fp80 %arg) {
 define i1 @test_x86_fp80_is_not_inf(x86_fp80 %arg) {
 ; CHECK-LABEL: define i1 @test_x86_fp80_is_not_inf(
 ; CHECK-SAME: x86_fp80 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ABS:%.*]] = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 [[ARG]])
-; CHECK-NEXT:    [[RET:%.*]] = fcmp une x86_fp80 [[ABS]], 0xK7FFF8000000000000000
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f80(x86_fp80 [[ARG]], i32 507)
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
   %ret = fcmp une x86_fp80 %abs, 0xK7FFF8000000000000000

>From f427c71348ce05052f9a437f6f5683939cd1b657 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 12 Mar 2024 18:50:31 +0800
Subject: [PATCH 3/4] fixup! [CodeGenPrepare] Reverse the canonicalization of
 isInf/isNanOrInf

---
 llvm/lib/CodeGen/CodeGenPrepare.cpp     |  28 +++--
 llvm/test/CodeGen/AMDGPU/fp-classify.ll | 136 ++++++++++--------------
 llvm/test/CodeGen/AMDGPU/fract-match.ll |  92 ++++++++--------
 3 files changed, 119 insertions(+), 137 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 516555d6e7d9c7..b0032429963461 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1943,7 +1943,8 @@ static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
   return false;
 }
 
-static bool foldFCmpToFPClassTest(CmpInst *Cmp) {
+static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
+                                  const DataLayout &DL) {
   FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
   if (!FCmp)
     return false;
@@ -1955,16 +1956,21 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp) {
   auto [ClassVal, ClassTest] =
       fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
                       FCmp->getOperand(0), FCmp->getOperand(1));
-  if (ClassVal && (ShouldReverseTransform(ClassTest) ||
-                   ShouldReverseTransform(~ClassTest))) {
-    IRBuilder<> Builder(Cmp);
-    Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
-    Cmp->replaceAllUsesWith(IsFPClass);
-    RecursivelyDeleteTriviallyDeadInstructions(Cmp);
-    return true;
-  }
+  if (!ClassVal)
+    return false;
 
-  return false;
+  if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
+    return false;
+
+  // Don't fold if the target offers free fabs.
+  if (TLI.isFAbsFree(TLI.getValueType(DL, ClassVal->getType())))
+    return false;
+
+  IRBuilder<> Builder(Cmp);
+  Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
+  Cmp->replaceAllUsesWith(IsFPClass);
+  RecursivelyDeleteTriviallyDeadInstructions(Cmp);
+  return true;
 }
 
 bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
@@ -1983,7 +1989,7 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
   if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
     return true;
 
-  if (foldFCmpToFPClassTest(Cmp))
+  if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
     return true;
 
   return false;
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index ed9ce4d62383b1..18d2e52e8f9002 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -61,10 +61,10 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
 ; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0x207
+; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT:    v_cmp_nlg_f32_e64 s[0:1], |s0|, v0
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -72,11 +72,11 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x207
+; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT:    v_cmp_nlg_f32_e64 s[2:3], |s2|, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x207
+; GFX11-NEXT:    v_cmp_nlg_f32_e64 s2, 0x7f800000, |s2|
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
@@ -143,29 +143,25 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
 ; SI-LABEL: test_isfinite_pattern_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
+; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0x1fb
+; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
-; SI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isfinite_pattern_0:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x1fb
+; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_o_f32_e64 s[2:3], s4, s4
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
-; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -177,10 +173,8 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_o_f32_e64 s3, s2, s2
-; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1fb
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b32 s2, s3, s2
+; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
@@ -355,13 +349,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0x1fb
+; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b32 s4, s0
 ; SI-NEXT:    s_mov_b32 s5, s1
 ; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s3, v0
-; SI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT:    v_cmp_neq_f32_e64 s[2:3], |s3|, v0
+; SI-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
@@ -369,11 +363,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
 ; VI-LABEL: test_isfinite_not_pattern_2:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x1fb
+; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_cmp_o_f32_e64 s[4:5], s2, s2
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s3, v0
-; VI-NEXT:    s_and_b64 s[2:3], s[4:5], vcc
+; VI-NEXT:    v_cmp_neq_f32_e64 s[2:3], |s3|, v0
+; VI-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
@@ -386,7 +380,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    v_cmp_o_f32_e64 s2, s2, s2
-; GFX11-NEXT:    v_cmp_class_f32_e64 s3, s3, 0x1fb
+; GFX11-NEXT:    v_cmp_neq_f32_e64 s3, 0x7f800000, |s3|
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s2, s2, s3
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
@@ -411,11 +405,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, 0x1fb
+; SI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_cmp_u_f32_e64 s[0:1], s2, s2
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
-; SI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT:    v_cmp_neq_f32_e64 s[2:3], |s2|, v0
+; SI-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
@@ -424,11 +418,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x1fb
+; VI-NEXT:    v_mov_b32_e32 v0, 0x7f800000
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_cmp_u_f32_e64 s[2:3], s4, s4
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
-; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
+; VI-NEXT:    v_cmp_neq_f32_e64 s[4:5], |s4|, v0
+; VI-NEXT:    s_and_b64 s[2:3], s[2:3], s[4:5]
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
@@ -443,7 +437,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    v_cmp_u_f32_e64 s3, s2, s2
-; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1fb
+; GFX11-NEXT:    v_cmp_neq_f32_e64 s2, 0x7f800000, |s2|
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s2, s3, s2
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
@@ -464,29 +458,25 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
 ; SI-LABEL: test_isfinite_pattern_4:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
+; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
-; SI-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isfinite_pattern_4:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_o_f32_e64 s[2:3], s4, s4
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
-; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -498,10 +488,8 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_o_f32_e64 s3, s2, s2
 ; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b32 s2, s3, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
@@ -520,29 +508,25 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1)
 ; SI-LABEL: test_isfinite_pattern_4_commute_and:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
+; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_cmp_o_f32_e64 s[0:1], s2, s2
-; SI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
-; SI-NEXT:    s_and_b64 s[0:1], vcc, s[0:1]
-; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
+; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_isfinite_pattern_4_commute_and:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_o_f32_e64 s[2:3], s4, s4
-; VI-NEXT:    v_cmp_class_f32_e32 vcc, s4, v0
-; VI-NEXT:    s_and_b64 s[2:3], vcc, s[2:3]
+; VI-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -554,10 +538,8 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1)
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_o_f32_e64 s3, s2, s2
 ; GFX11-NEXT:    v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b32 s2, s2, s3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
@@ -702,15 +684,13 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
 ;
 ; VI-LABEL: test_isfinite_pattern_0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT:    v_mov_b32_e32 v0, 0x1fb
+; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_o_f16_e64 s[2:3], s4, s4
-; VI-NEXT:    v_cmp_class_f16_e32 vcc, s4, v0
-; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
+; VI-NEXT:    v_cmp_class_f16_e32 vcc, s2, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -722,10 +702,8 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_o_f16_e64 s3, s2, s2
-; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x1fb
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b32 s2, s3, s2
+; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x1f8
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
@@ -760,15 +738,13 @@ define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocaptur
 ;
 ; VI-LABEL: test_isfinite_pattern_4_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x1f8
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_cmp_o_f16_e64 s[2:3], s4, s4
-; VI-NEXT:    v_cmp_class_f16_e32 vcc, s4, v0
-; VI-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
+; VI-NEXT:    v_cmp_class_f16_e32 vcc, s2, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -780,10 +756,8 @@ define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_o_f16_e64 s3, s2, s2
 ; GFX11-NEXT:    v_cmp_class_f16_e64 s2, s2, 0x1f8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_and_b32 s2, s3, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s2
 ; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_nop 0
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index d65dff7bc44627..e361aa4db2aa94 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -53,14 +53,14 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
 ; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
 ; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
 ; GFX6-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
-; GFX6-NEXT:    v_mov_b32_e32 v5, 0x204
+; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
-; GFX6-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
+; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
@@ -68,15 +68,15 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
 ; GFX7-LABEL: safe_math_fract_f32:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x204
+; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    v_fract_f32_e32 v4, v0
-; GFX7-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
+; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
 ; GFX7-NEXT:    v_floor_f32_e32 v3, v0
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -84,11 +84,11 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
 ; GFX8-LABEL: safe_math_fract_f32:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x204
+; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
 ; GFX8-NEXT:    v_fract_f32_e32 v4, v0
-; GFX8-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
+; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
 ; GFX8-NEXT:    v_floor_f32_e32 v3, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX8-NEXT:    global_store_dword v[1:2], v3, off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -97,10 +97,10 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fract_f32_e32 v3, v0
-; GFX11-NEXT:    v_cmp_class_f32_e64 s0, v0, 0x204
+; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX11-NEXT:    v_floor_f32_e32 v4, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v3, 0, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc_lo
 ; GFX11-NEXT:    global_store_b32 v[1:2], v4, off
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -210,14 +210,14 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX6-NEXT:    v_floor_f32_e32 v3, v0
 ; GFX6-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX6-NEXT:    v_mov_b32_e32 v5, 0x204
+; GFX6-NEXT:    s_mov_b32 s8, 0x7f800000
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX6-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
+; GFX6-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX6-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
@@ -227,14 +227,14 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    v_floor_f32_e32 v3, v0
 ; GFX7-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX7-NEXT:    v_mov_b32_e32 v5, 0x204
+; GFX7-NEXT:    s_mov_b32 s8, 0x7f800000
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX7-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
+; GFX7-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s8
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX7-NEXT:    buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -244,10 +244,10 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_floor_f32_e32 v3, v0
 ; GFX8-NEXT:    v_sub_f32_e32 v4, v0, v3
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x204
+; GFX8-NEXT:    s_mov_b32 s4, 0x7f800000
 ; GFX8-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX8-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX8-NEXT:    v_cmp_neq_f32_e64 vcc, |v0|, s4
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX8-NEXT:    global_store_dword v[1:2], v3, off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -256,12 +256,12 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_floor_f32_e32 v3, v0
-; GFX11-NEXT:    v_cmp_class_f32_e64 s0, v0, 0x204
+; GFX11-NEXT:    v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_sub_f32_e32 v4, v0, v3
 ; GFX11-NEXT:    global_store_b32 v[1:2], v3, off
 ; GFX11-NEXT:    v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s0
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc_lo
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %floor = tail call float @llvm.floor.f32(float %x)
@@ -1824,16 +1824,17 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
 ; GFX6-NEXT:    v_add_f64 v[6:7], v[0:1], -v[4:5]
 ; GFX6-NEXT:    v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
 ; GFX6-NEXT:    v_min_f64 v[6:7], v[6:7], s[8:9]
-; GFX6-NEXT:    v_mov_b32_e32 v8, 0x204
+; GFX6-NEXT:    s_mov_b32 s8, 0
+; GFX6-NEXT:    s_mov_b32 s9, 0x7ff00000
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
-; GFX6-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v8
+; GFX6-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9]
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s4, s6
 ; GFX6-NEXT:    s_mov_b32 s5, s6
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v6, 0, vcc
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v7, 0, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
 ; GFX6-NEXT:    buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
@@ -1841,16 +1842,17 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
 ; GFX7-LABEL: safe_math_fract_f64:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v6, 0x204
+; GFX7-NEXT:    s_mov_b32 s4, 0
+; GFX7-NEXT:    s_mov_b32 s5, 0x7ff00000
 ; GFX7-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
-; GFX7-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v6
+; GFX7-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
 ; GFX7-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
-; GFX7-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX7-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; GFX7-NEXT:    buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
@@ -1858,12 +1860,13 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
 ; GFX8-LABEL: safe_math_fract_f64:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v6, 0x204
+; GFX8-NEXT:    s_mov_b32 s4, 0
+; GFX8-NEXT:    s_mov_b32 s5, 0x7ff00000
 ; GFX8-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
-; GFX8-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v6
+; GFX8-NEXT:    v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
 ; GFX8-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
-; GFX8-NEXT:    v_cndmask_b32_e64 v1, v5, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
 ; GFX8-NEXT:    global_store_dwordx2 v[2:3], v[6:7], off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -1872,11 +1875,10 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fract_f64_e32 v[4:5], v[0:1]
-; GFX11-NEXT:    v_cmp_class_f64_e64 s0, v[0:1], 0x204
+; GFX11-NEXT:    v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
 ; GFX11-NEXT:    v_floor_f64_e32 v[6:7], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, 0, s0
-; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, 0, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT:    v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[6:7], off
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -1981,11 +1983,11 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
 ; GFX8-LABEL: safe_math_fract_f16:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v5, 0x204
+; GFX8-NEXT:    s_movk_i32 s4, 0x7c00
 ; GFX8-NEXT:    v_fract_f16_e32 v4, v0
-; GFX8-NEXT:    v_cmp_class_f16_e32 vcc, v0, v5
+; GFX8-NEXT:    v_cmp_neq_f16_e64 vcc, |v0|, s4
 ; GFX8-NEXT:    v_floor_f16_e32 v3, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v4, vcc
 ; GFX8-NEXT:    global_store_short v[1:2], v3, off
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
@@ -1994,10 +1996,10 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fract_f16_e32 v3, v0
-; GFX11-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
+; GFX11-NEXT:    v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0|
 ; GFX11-NEXT:    v_floor_f16_e32 v4, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, v3, 0, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc_lo
 ; GFX11-NEXT:    global_store_b16 v[1:2], v4, off
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:

>From b4ddbe6c0463ac0ca42fdc68a11f8d9859f360bd Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 14 Mar 2024 16:19:03 +0800
Subject: [PATCH 4/4] fixup! [CodeGenPrepare] Reverse the canonicalization of
 isInf/isNanOrInf

---
 llvm/lib/CodeGen/CodeGenPrepare.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 447c26efe092ed..47f7981a240882 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1949,6 +1949,13 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
   if (!FCmp)
     return false;
 
+  // Don't fold if the target offers free fabs and the predicate is legal.
+  EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
+  if (TLI.isFAbsFree(VT) &&
+      TLI.isCondCodeLegal(getFCmpCondCode(FCmp->getPredicate()),
+                          VT.getSimpleVT()))
+    return false;
+
   // Reverse the canonicalization if it is a FP class test
   auto ShouldReverseTransform = [](FPClassTest ClassTest) {
     return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
@@ -1962,10 +1969,6 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
   if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
     return false;
 
-  // Don't fold if the target offers free fabs.
-  if (TLI.isFAbsFree(TLI.getValueType(DL, ClassVal->getType())))
-    return false;
-
   IRBuilder<> Builder(Cmp);
   Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
   Cmp->replaceAllUsesWith(IsFPClass);



More information about the llvm-commits mailing list