[llvm] Revert "[ARM] Stop gluing FP comparisons to FMSTAT" (PR #117175)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 21 07:21:49 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Sergei Barannikov (s-barannikov)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->116676
Reverting per post-commit feedback (causes miscompilation errors and/or assertion failures).
---
Patch is 350.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117175.diff
17 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+15-11)
- (modified) llvm/lib/Target/ARM/ARMInstrVFP.td (+19-37)
- (modified) llvm/lib/Target/ARM/ARMRegisterInfo.td (+1-3)
- (modified) llvm/test/CodeGen/ARM/fcmp-xo.ll (+5-5)
- (modified) llvm/test/CodeGen/ARM/fp16-instructions.ll (+29-29)
- (modified) llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll (+16-16)
- (modified) llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll (+538-191)
- (modified) llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll (+319-101)
- (modified) llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll (+47-47)
- (modified) llvm/test/CodeGen/ARM/select.ll (+10-10)
- (modified) llvm/test/CodeGen/Thumb2/mve-fmas.ll (+58-58)
- (modified) llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll (+2163-1350)
- (modified) llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll (+1249-714)
- (modified) llvm/test/CodeGen/Thumb2/mve-pred-ext.ll (+14-14)
- (modified) llvm/test/CodeGen/Thumb2/mve-vcmpf.ll (+27-23)
- (modified) llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll (+44-36)
- (modified) llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll (+46-38)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6b290135c5bcba..84b37ae6833aed 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4971,14 +4971,14 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const SDLoc &dl,
bool Signaling) const {
assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
- SDValue Flags;
+ SDValue Cmp;
if (!isFloatingPointZero(RHS))
- Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
- LHS, RHS);
+ Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
+ dl, MVT::Glue, LHS, RHS);
else
- Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
- FlagsVT, LHS);
- return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Flags);
+ Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
+ dl, MVT::Glue, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
/// duplicateCmp - Glue values can have only one use, so this function
@@ -4991,11 +4991,15 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
- SDValue Flags = Cmp.getOperand(0);
- assert((Flags.getOpcode() == ARMISD::CMPFP ||
- Flags.getOpcode() == ARMISD::CMPFPw0) &&
- "unexpected operand of FMSTAT");
- return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Flags);
+ Cmp = Cmp.getOperand(0);
+ Opc = Cmp.getOpcode();
+ if (Opc == ARMISD::CMPFP)
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ else {
+ assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
// This function returns three things: the arithmetic computation itself
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index a29753909ea992..5b49f728ebb8d8 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -10,17 +10,7 @@
//
//===----------------------------------------------------------------------===//
-def SDT_CMPFP : SDTypeProfile<1, 2, [
- SDTCisVT<0, FlagsVT>, // out flags
- SDTCisFP<1>, // lhs
- SDTCisSameAs<2, 1> // rhs
-]>;
-
-def SDT_CMPFP0 : SDTypeProfile<1, 1, [
- SDTCisVT<0, FlagsVT>, // out flags
- SDTCisFP<1> // operand
-]>;
-
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
@@ -28,18 +18,11 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_CMPFP>;
-def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0>;
-def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>;
-def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>;
-
-def arm_fmstat : SDNode<"ARMISD::FMSTAT",
- SDTypeProfile<0, 1, [
- SDTCisVT<0, FlagsVT> // in flags
- ]>,
- [SDNPOutGlue] // TODO: Change Glue to a normal result.
->;
-
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfpe0: SDNode<"ARMISD::CMPFPEw0",SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>;
@@ -623,12 +606,12 @@ let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "",
- [(set FPSCR_NZCV, (arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm)))]>;
+ [(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "",
- [(set FPSCR_NZCV, (arm_cmpfpe SPR:$Sd, SPR:$Sm))]> {
+ [(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -637,17 +620,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
- [(set FPSCR_NZCV, (arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm)))]>;
+ [(arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm))]>;
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "",
- [(set FPSCR_NZCV, (arm_cmpfp DPR:$Dd, (f64 DPR:$Dm)))]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "",
- [(set FPSCR_NZCV, (arm_cmpfp SPR:$Sd, SPR:$Sm))]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -656,7 +639,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
- [(set FPSCR_NZCV, (arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm)))]>;
+ [(arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm))]>;
} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
@@ -686,7 +669,7 @@ let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "",
- [(set FPSCR_NZCV, (arm_cmpfpe0 (f64 DPR:$Dd)))]> {
+ [(arm_cmpfpe0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -694,7 +677,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "",
- [(set FPSCR_NZCV, (arm_cmpfpe0 SPR:$Sd))]> {
+ [(arm_cmpfpe0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -706,7 +689,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
- [(set FPSCR_NZCV, (arm_cmpfpe0 (f16 HPR:$Sd)))]> {
+ [(arm_cmpfpe0 (f16 HPR:$Sd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -714,7 +697,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "",
- [(set FPSCR_NZCV, (arm_cmpfp0 (f64 DPR:$Dd)))]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -722,7 +705,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "",
- [(set FPSCR_NZCV, (arm_cmpfp0 SPR:$Sd))]> {
+ [(arm_cmpfp0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -734,7 +717,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
- [(set FPSCR_NZCV, (arm_cmpfp0 (f16 HPR:$Sd)))]> {
+ [(arm_cmpfp0 (f16 HPR:$Sd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -2509,8 +2492,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
Rt = 0b1111 /* apsr_nzcv */ in
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
- "vmrs", "\tAPSR_nzcv, fpscr",
- [(arm_fmstat FPSCR_NZCV)]>;
+ "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
// Application level FPSCR -> GPR
let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index f5a675e2976bb7..f37d0fe542b4f7 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -413,9 +413,7 @@ def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1, v2i1], 32, (add VPR)> {
// FPSCR, when the flags at the top of it are used as the input or
// output to an instruction such as MVE VADC.
-def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)> {
- let CopyCost = -1;
-}
+def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)>;
// Scalar single precision floating point register class..
// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
diff --git a/llvm/test/CodeGen/ARM/fcmp-xo.ll b/llvm/test/CodeGen/ARM/fcmp-xo.ll
index 908dbd7a11a6b6..3d5972f065859f 100644
--- a/llvm/test/CodeGen/ARM/fcmp-xo.ll
+++ b/llvm/test/CodeGen/ARM/fcmp-xo.ll
@@ -54,12 +54,12 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr {
; NEON-LABEL: float128:
; NEON: @ %bb.0:
; NEON-NEXT: mov.w r0, #1124073472
-; NEON-NEXT: vmov.f32 s4, #5.000000e-01
-; NEON-NEXT: vmov d1, r0, r0
-; NEON-NEXT: vmov.f32 s6, #-5.000000e-01
-; NEON-NEXT: vcmp.f32 s2, s0
+; NEON-NEXT: vmov.f32 s2, #5.000000e-01
+; NEON-NEXT: vmov d3, r0, r0
+; NEON-NEXT: vmov.f32 s4, #-5.000000e-01
+; NEON-NEXT: vcmp.f32 s6, s0
; NEON-NEXT: vmrs APSR_nzcv, fpscr
-; NEON-NEXT: vselgt.f32 s0, s6, s4
+; NEON-NEXT: vselgt.f32 s0, s4, s2
; NEON-NEXT: bx lr
%1 = fcmp nsz olt float %a0, 128.000000e+00
%2 = select i1 %1, float -5.000000e-01, float 5.000000e-01
diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll
index 7a1d5ddfa301b6..1988cb1d2f9039 100644
--- a/llvm/test/CodeGen/ARM/fp16-instructions.ll
+++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll
@@ -700,9 +700,9 @@ define half @select_cc1(ptr %a0) {
; CHECK-LABEL: select_cc1:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vseleq.f16 s0,
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 s0,
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -728,9 +728,9 @@ define half @select_cc_ge1(ptr %a0) {
; CHECK-LABEL: select_cc_ge1:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vselge.f16 s0,
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -751,9 +751,9 @@ define half @select_cc_ge2(ptr %a0) {
; CHECK-LABEL: select_cc_ge2:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vselge.f16 s0,
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -774,9 +774,9 @@ define half @select_cc_ge3(ptr %a0) {
; CHECK-LABEL: select_cc_ge3:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vselge.f16 s0,
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0,
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -797,9 +797,9 @@ define half @select_cc_ge4(ptr %a0) {
; CHECK-LABEL: select_cc_ge4:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -821,9 +821,9 @@ define half @select_cc_gt1(ptr %a0) {
; CHECK-LABEL: select_cc_gt1:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -844,9 +844,9 @@ define half @select_cc_gt2(ptr %a0) {
; CHECK-LABEL: select_cc_gt2:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -867,9 +867,9 @@ define half @select_cc_gt3(ptr %a0) {
; CHECK-LABEL: select_cc_gt3:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -890,9 +890,9 @@ define half @select_cc_gt4(ptr %a0) {
; CHECK-LABEL: select_cc_gt4:
-; CHECK-HARDFP-FULLFP16: vcmp.f16
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
-; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
+; CHECK-HARDFP-FULLFP16: vcmp.f16
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}}
; CHECK-SOFTFP-FP16-A32: vcmp.f32
; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr
@@ -923,10 +923,10 @@ entry:
; CHECK-LABEL: select_cc4:
; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}}
-; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}}
-; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
+; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]]
+; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]]
; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]]
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 996b46c51ab361..56e734c4404336 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -5,11 +5,11 @@
define half @fp16_vminnm_o(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_o:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmov.f16 s2, r1
-; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, r0
+; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -37,11 +37,11 @@ entry:
define half @fp16_vminnm_u(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_u:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -53,11 +53,11 @@ entry:
define half @fp16_vminnm_ule(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_ule:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r1
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vmov.f16 s2, r1
+; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
@@ -69,11 +69,11 @@ entry:
define half @fp16_vminnm_u_rev(half %a, half %b) {
; CHECK-LABEL: fp16_vminnm_u_rev:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmov.f16 s2, r1
-; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmov.f16 s0, r1
+; CHECK-NEXT: vmov.f16 s2, r0
+; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 84f6ee276ba5f1..4b27e804e6df9a 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -258,11 +258,11 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
; VFP2: @ %bb.0:
; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: vldr s2, .LCPI2_0
-; VFP2-NEXT: vldr s6, .LCPI2_1
; VFP2-NEXT: vcvt.s32.f32 s4, s0
; VFP2-NEXT: vcmp.f32 s0, s2
+; VFP2-NEXT: vldr s2, .LCPI2_1
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
+; VFP2-NEXT: vcmp.f32 s0, s2
; VFP2-NEXT: vmov r0, s4
; VFP2-NEXT: itt lt
; VFP2-NEXT: movwlt r0, #61440
@@ -358,11 +358,11 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
; VFP2: @ %bb.0:
; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: vldr s2, .LCPI3_0
-; VFP2-NEXT: vldr s6, .LCPI3_1
; VFP2-NEXT: vcvt.s32.f32 s4, s0
; VFP2-NEXT: vcmp.f32 s0, s2
+; VFP2-NEXT: vldr s2, .LCPI3_1
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
+; VFP2-NEXT: vcmp.f32 s0, s2
; VFP2-NEXT: vmov r0, s4
; VFP2-NEXT: itt lt
; VFP2-NEXT: movwlt r0, #32768
@@ -458,11 +458,11 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
; VFP2: @ %bb.0:
; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: vldr s2, .LCPI4_0
-; VFP2-NEXT: vldr s6, .LCPI4_1
; VFP2-NEXT: vcvt.s32.f32 s4, s0
; VFP2-NEXT: vcmp.f32 s0, s2
+; VFP2-NEXT: vldr s2, .LCPI4_1
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
+; VFP2-NEXT: vcmp.f32 s0, s2
; VFP2-N...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/117175
More information about the llvm-commits
mailing list