[llvm] [SystemZ] Don't use FP Load and Test as comparisons to same reg (PR #78074)

Sat Jan 13 15:52:29 PST 2024

https://github.com/JonPsson1 created https://github.com/llvm/llvm-project/pull/78074

The usage of FP Load and Test instructions as a comparison against zero with
the assumption that the dest reg will always reflect the source reg is actually
incorrect. Unfortunately, a SNaN will be converted to a QNaN, so the instruction
may actually change the value as opposed to being a pure register move with a
test.

This patch
- changes instruction selection to always emit FP LT with a scratch def reg, which
   will typically be allocated to the same reg if dead.
- Removes the conversions into FP LT in SystemZElimcompare.

The overall impact of this on benchmarks is very limited, just some ~50 more
COPYs in total.

I tried checking for a single use on zEC12 and in those cases emit the compare form
that has two use operands instead of def/use, but when I removed that I saw that it
had basically no impact (just some ~300 LT using a scratch reg, but no more
spilling/copying). Therefore, this was removed entirely as a simplification.

I tried removing the "forward scan", but that is still valuable with some ~7k L/LG
transformations.

The one open end I am not sure about is the **NoFPExcept** flag? In the POP it says
that an "exception is recognized". Since the LT may actually raise an exception, it
should not have the NoFPExcept flag. On the other hand, that flag itself should
not be there unless the input value is guaranteed to not be a NaN..?

(In emitLoadAndTestCmp0, I first cleared that flag, but then realized that adjustCCMasksForInstr()
in SystemZElimCompare will then bail from the transformation because of this.)


>From 748b15ea29581bbd3778398762b9e60a625bec1f Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Thu, 11 Jan 2024 16:18:17 -0600
Subject: [PATCH 1/2] IP

---
 .../lib/Target/SystemZ/SystemZElimCompare.cpp |  8 +-
 .../Target/SystemZ/SystemZISelLowering.cpp    | 25 +++--
 llvm/lib/Target/SystemZ/SystemZISelLowering.h |  3 +-
 llvm/lib/Target/SystemZ/SystemZInstrFP.td     | 33 +++----
 .../lib/Target/SystemZ/SystemZInstrFormats.td |  3 +-
 llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp  |  3 -
 llvm/test/CodeGen/SystemZ/fp-cmp-04.ll        | 31 +++---
 llvm/test/CodeGen/SystemZ/fp-cmp-07.mir       | 44 ---------
 llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll      | 94 +++++++++++++++++++
 llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll | 41 ++++----
 llvm/test/CodeGen/SystemZ/tdc-06.ll           |  2 +-
 llvm/test/CodeGen/SystemZ/vec-max-05.ll       |  3 +-
 llvm/test/CodeGen/SystemZ/vec-min-05.ll       |  3 +-
 13 files changed, 173 insertions(+), 120 deletions(-)
 delete mode 100644 llvm/test/CodeGen/SystemZ/fp-cmp-07.mir
 create mode 100644 llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll

diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 340dba1362aff1..0991400af1c276 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -115,12 +115,6 @@ static bool preservesValueOf(MachineInstr &MI, unsigned Reg) {
   case SystemZ::LTR:
   case SystemZ::LTGR:
   case SystemZ::LTGFR:
-  case SystemZ::LER:
-  case SystemZ::LDR:
-  case SystemZ::LXR:
-  case SystemZ::LTEBR:
-  case SystemZ::LTDBR:
-  case SystemZ::LTXBR:
     if (MI.getOperand(1).getReg() == Reg)
       return true;
   }
@@ -569,7 +563,7 @@ bool SystemZElimCompare::optimizeCompareZero(
 
   // Also do a forward search to handle cases where an instruction after the
   // compare can be converted, like
-  // LTEBRCompare %f0s, %f0s; %f2s = LER %f0s  =>  LTEBRCompare %f2s, %f0s
+  // LTGR %r0d, %r0d; %r1d = LGR %r0d  =>  LTGR %r1d, %r0d          XXXX
   auto MIRange = llvm::make_range(
       std::next(MachineBasicBlock::iterator(&Compare)), MBB.end());
   for (MachineInstr &MI : llvm::make_early_inc_range(MIRange)) {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2450c6801a6632..1e1dd65de25967 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -9175,14 +9175,23 @@ MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
 }
 
 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
-    MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
+    MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
+    unsigned CmpOpcode) const {
   MachineFunction &MF = *MBB->getParent();
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI.getDebugLoc();
-
   Register SrcReg = MI.getOperand(0).getReg();
 
+  // This instruction will raise an exception if the input is a SNaN.
+  //  MI.clearFlag(MachineInstr::MIFlag::NoFPExcept);
+
+  // XXX Worthwile?  Regallochints for dead def instead?
+  if (!Subtarget.hasVector() && MRI->hasOneNonDBGUser(SrcReg)) {
+    MI.setDesc(TII->get(CmpOpcode));
+    return MBB;
+  }
+
   // Create new virtual register of the same class as source.
   const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
   Register DstReg = MRI->createVirtualRegister(RC);
@@ -9431,12 +9440,12 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
   case SystemZ::TBEGINC:
     return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
-  case SystemZ::LTEBRCompare_VecPseudo:
-    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
-  case SystemZ::LTDBRCompare_VecPseudo:
-    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
-  case SystemZ::LTXBRCompare_VecPseudo:
-    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+  case SystemZ::LTEBRCompare_Pseudo:
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR, SystemZ::LTEBRCompare);
+  case SystemZ::LTDBRCompare_Pseudo:
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR, SystemZ::LTDBRCompare);
+  case SystemZ::LTXBRCompare_Pseudo:
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR, SystemZ::LTXBRCompare);
 
   case SystemZ::PROBED_ALLOCA:
     return emitProbedAlloca(MI, MBB);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index baf4ba41654879..5067de5a7c74d5 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -785,7 +785,8 @@ class SystemZTargetLowering : public TargetLowering {
                                           unsigned Opcode, bool NoFloat) const;
   MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
                                          MachineBasicBlock *MBB,
-                                         unsigned Opcode) const;
+                                         unsigned Opcode,
+                                         unsigned CmpOpcode) const;
   MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
                                       MachineBasicBlock *MBB) const;
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index ea62e99a58399c..74161c0db533e1 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -51,36 +51,29 @@ let isCodeGenOnly = 1 in
   def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>;
 
 // Moves between two floating-point registers that also set the condition
-// codes.
+// codes. Note that these instructions will turn SNaNs into QNaNs and should
+// not be used for comparison if the result will be used afterwards.
 let Uses = [FPC], mayRaiseFPException = 1,
     Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
   defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
   defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
   defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
 }
-// Note that LTxBRCompare is not available if we have vector support,
-// since load-and-test instructions will partially clobber the target
-// (vector) register.
-let Predicates = [FeatureNoVector] in {
-  defm : CompareZeroFP<LTEBRCompare, FP32>;
-  defm : CompareZeroFP<LTDBRCompare, FP64>;
-  defm : CompareZeroFP<LTXBRCompare, FP128>;
-}
 
-// Use a normal load-and-test for compare against zero in case of
-// vector support (via a pseudo to simplify instruction selection).
+// Use a normal load-and-test for compare against zero in case of vector
+// support (via a pseudo to simplify instruction selection). For a non-vector
+// subtarget these could be expanded to the "Compare" version which does not
+// clobber an extra phys-reg, but only if the result is not live. XXX Compare worthwhile?
 let Uses = [FPC], mayRaiseFPException = 1,
     Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
-  def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
-  def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
-  def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
-}
-let Predicates = [FeatureVector] in {
-  defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
-  defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
+  def LTEBRCompare_Pseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
+  def LTDBRCompare_Pseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
+  def LTXBRCompare_Pseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
 }
-let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in
-  defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
+defm : CompareZeroFP<LTEBRCompare_Pseudo, FP32>;
+defm : CompareZeroFP<LTDBRCompare_Pseudo, FP64>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+  defm : CompareZeroFP<LTXBRCompare_Pseudo, FP128>;
 
 // Moves between 64-bit integer and floating-point registers.
 def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 2e5ff4a1df673b..50b5ce5b83c391 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5040,7 +5040,8 @@ class BranchPreloadMII<string mnemonic, bits<8> opcode>
 // operation and one that acts as a comparison against zero.
 // Note that the comparison against zero operation is not available if we
 // have vector support, since load-and-test instructions will partially
-// clobber the target (vector) register.
+// clobber the target (vector) register. It should also not be used if the
+// destination is used as it may actually be clobbered (if source is a SNaN).
 multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
                           RegisterOperand cls> {
   def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index ac8c395f9064fb..bf6547cc87ec5e 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1692,9 +1692,6 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
   case SystemZ::LR:     return SystemZ::LTR;
   case SystemZ::LGFR:   return SystemZ::LTGFR;
   case SystemZ::LGR:    return SystemZ::LTGR;
-  case SystemZ::LER:    return SystemZ::LTEBR;
-  case SystemZ::LDR:    return SystemZ::LTDBR;
-  case SystemZ::LXR:    return SystemZ::LTXBR;
   case SystemZ::LCDFR:  return SystemZ::LCDBR;
   case SystemZ::LPDFR:  return SystemZ::LPDBR;
   case SystemZ::LNDFR:  return SystemZ::LNDBR;
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
index f93b27f829f844..c1773abe92305d 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -163,7 +163,7 @@ exit:
 define float @f9(float %a, float %b, ptr %dest) {
 ; CHECK-LABEL: f9:
 ; CHECK: meebr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: blhr %r14
 ; CHECK: br %r14
 entry:
@@ -185,7 +185,7 @@ define float @f10(float %a, float %b, float %c, ptr %dest) {
 ; CHECK-LABEL: f10:
 ; CHECK: aebr %f0, %f2
 ; CHECK-NEXT: debr %f0, %f4
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: bner %r14
 ; CHECK: br %r14
 entry:
@@ -209,7 +209,7 @@ define float @f11(float %a, float %b, float %c, ptr %dest1, ptr %dest2) {
 ; CHECK: aebr %f0, %f2
 ; CHECK-NEXT: sebr %f4, %f0
 ; CHECK-DAG: ste %f4, 0(%r2)
-; CHECK-DAG: ltebr %f0, %f0
+; CHECK-DAG: ltebr %f1, %f0
 ; CHECK-NEXT: ber %r14
 ; CHECK: br %r14
 entry:
@@ -227,10 +227,11 @@ exit:
   ret float %add
 }
 
-; Test that LER gets converted to LTEBR where useful.
+; %val in %f2 must be preserved during comparison and also copied to %f0.
 define float @f12(float %dummy, float %val, ptr %dest) {
 ; CHECK-LABEL: f12:
-; CHECK: ltebr %f0, %f2
+; CHECK: ler %f0, %f2
+; CHECK-NEXT: ltebr %f1, %f2
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
@@ -249,10 +250,11 @@ exit:
   ret float %val
 }
 
-; Test that LDR gets converted to LTDBR where useful.
+; Same for double.
 define double @f13(double %dummy, double %val, ptr %dest) {
 ; CHECK-LABEL: f13:
-; CHECK: ltdbr %f0, %f2
+; CHECK: ldr %f0, %f2
+; CHECK-NEXT: ltdbr %f1, %f2
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
@@ -271,14 +273,15 @@ exit:
   ret double %val
 }
 
-; Test that LXR gets converted to LTXBR where useful.
+; LXR cannot be converted to LTXBR as its input is live after it.
 define void @f14(ptr %ptr1, ptr %ptr2) {
 ; CHECK-LABEL: f14:
-; CHECK: ltxbr
+; CHECK: lxr
 ; CHECK-NEXT: dxbr
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
 ; CHECK-NEXT: mxbr
+; CHECK-NEXT: ltxbr
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
 ; CHECK-NEXT: blr %r14
@@ -301,11 +304,10 @@ exit:
   ret void
 }
 
-; Test a case where it is the source rather than destination of LER that
-; we need.
 define float @f15(float %val, float %dummy, ptr %dest) {
 ; CHECK-LABEL: f15:
-; CHECK: ltebr %f2, %f0
+; CHECK: ltebr %f1, %f0
+; CHECK-NEXT: ler %f2, %f0
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f2
 ; CHECK-NEXT: #NO_APP
@@ -324,11 +326,10 @@ exit:
   ret float %val
 }
 
-; Test a case where it is the source rather than destination of LDR that
-; we need.
 define double @f16(double %val, double %dummy, ptr %dest) {
 ; CHECK-LABEL: f16:
-; CHECK: ltdbr %f2, %f0
+; CHECK: ltdbr %f1, %f0
+; CHECK: ldr %f2, %f0
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f2
 ; CHECK-NEXT: #NO_APP
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir b/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir
deleted file mode 100644
index 63b9a3a4c9f022..00000000000000
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir
+++ /dev/null
@@ -1,44 +0,0 @@
-# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as -start-after=block-placement %s -o - | FileCheck %s
-# Test that LTEBR is used without an unnecessary LER
-
---- |
-  define float @f15(float %val, float %dummy, float* %dest) {
-  entry:
-    call void asm sideeffect "blah $0", "{f2}"(float %val)
-    %cmp = fcmp olt float %val, 0.000000e+00
-    br i1 %cmp, label %exit, label %store
-
-  store:                                            ; preds = %entry
-    store float %val, float* %dest
-    br label %exit
-
-  exit:                                             ; preds = %store, %entry
-    ret float %val
-  }
-
-...
-
-# CHECK: ltebr %f2, %f0
-
----
-name:            f15
-tracksRegLiveness: true
-liveins:
-  - { reg: '$f0s', virtual-reg: '' }
-  - { reg: '$r2d', virtual-reg: '' }
-body:             |
-  bb.0.entry:
-    liveins: $f0s, $r2d
-
-    LTEBRCompare $f0s, $f0s, implicit-def $cc, implicit $fpc
-    $f2s = LER $f0s
-    INLINEASM &"blah $0", 1, 9, $f2s
-    CondReturn 15, 4, implicit $f0s, implicit $cc
-
-  bb.1.store:
-    liveins: $f0s, $r2d
-
-    STE $f0s, killed $r2d, 0, $noreg :: (store (s32) into %ir.dest)
-    Return implicit $f0s
-
-...
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll
new file mode 100644
index 00000000000000..01318f3cf119a8
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Check comparisons with zero. If the tested value is live after the
+; comparison, load and test cannot be used to the same register.
+
+; Compared value is used afterwards.
+define i64 @f1(i64 %a, i64 %b, float %V, ptr %dst) {
+; CHECK-LABEL: f1:
+; CHECK: ltebr %f1, %f0
+  %cond = fcmp oeq float %V, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile float %V, ptr %dst
+  ret i64 %res
+}
+
+define i64 @f1m(i64 %a, i64 %b, float %V, ptr %dst) {
+; CHECK-LABEL: f1m:
+; CHECK: ltebr %f1, %f0
+  %cond = fcmp oeq float %V, -0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile float %V, ptr %dst
+  ret i64 %res
+}
+
+; Value only used in comparison.
+define i64 @f2(i64 %a, i64 %b, float %V) {
+; CHECK-LABEL: f2:
+; CHECK: ltebr %f0, %f0
+  %cond = fcmp oeq float %V, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+define i64 @f2m(i64 %a, i64 %b, float %V) {
+; CHECK-LABEL: f2m:
+; CHECK: ltebr %f0, %f0
+  %cond = fcmp oeq float %V, -0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Same for double
+define i64 @f3(i64 %a, i64 %b, double %V, ptr %dst) {
+; CHECK-LABEL: f3:
+; CHECK: ltdbr %f1, %f0
+  %cond = fcmp oeq double %V, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile double %V, ptr %dst
+  ret i64 %res
+}
+
+define i64 @f3m(i64 %a, i64 %b, double %V, ptr %dst) {
+; CHECK-LABEL: f3m:
+; CHECK: ltdbr %f1, %f0
+  %cond = fcmp oeq double %V, -0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile double %V, ptr %dst
+  ret i64 %res
+}
+
+define i64 @f4(i64 %a, i64 %b, double %V) {
+; CHECK-LABEL: f4:
+; CHECK: ltdbr %f0, %f0
+  %cond = fcmp oeq double %V, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+define i64 @f4m(i64 %a, i64 %b, double %V) {
+; CHECK-LABEL: f4m:
+; CHECK: ltdbr %f0, %f0
+  %cond = fcmp oeq double %V, -0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Same for fp128
+define i64 @f5(i64 %a, i64 %b, fp128 %V, ptr %dst) {
+; CHECK-LABEL: f5:
+; CHECK: ltxbr %f1, %f0
+  %cond = fcmp oeq fp128 %V, 0xL00000000000000008000000000000000
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile fp128 %V, ptr %dst
+  ret i64 %res
+}
+
+define i64 @f6(i64 %a, i64 %b, fp128 %V) {
+; CHECK-LABEL: f6:
+; CHECK: ltxbr %f0, %f0
+  %cond = fcmp oeq fp128 %V, 0xL00000000000000008000000000000000
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
index bc6f3f469c3a29..bf9ccbcd70550e 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
@@ -140,7 +140,7 @@ exit:
 define float @f6(float %dummy, float %a, ptr %dest) #0 {
 ; CHECK-LABEL: f6:
 ; CHECK: lpdfr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: bhr %r14
 ; CHECK: br %r14
 entry:
@@ -163,7 +163,7 @@ exit:
 define float @f7(float %dummy, float %a, ptr %dest) #0 {
 ; CHECK-LABEL: f7:
 ; CHECK: lndfr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -187,7 +187,7 @@ exit:
 define float @f8(float %dummy, float %a, ptr %dest) #0 {
 ; CHECK-LABEL: f8:
 ; CHECK: lcdfr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: bler %r14
 ; CHECK: br %r14
 entry:
@@ -210,7 +210,7 @@ exit:
 define float @f9(float %a, float %b, ptr %dest) #0 {
 ; CHECK-LABEL: f9:
 ; CHECK: meebr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: blhr %r14
 ; CHECK: br %r14
 entry:
@@ -238,7 +238,7 @@ define float @f10(float %a, float %b, float %c, ptr %dest) #0 {
 ; CHECK-LABEL: f10:
 ; CHECK: aebr %f0, %f2
 ; CHECK-NEXT: debr %f0, %f4
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: bner %r14
 ; CHECK: br %r14
 entry:
@@ -271,7 +271,7 @@ define float @f11(float %a, float %b, float %c, ptr %dest1, ptr %dest2) #0 {
 ; CHECK: aebr %f0, %f2
 ; CHECK-NEXT: sebr %f4, %f0
 ; CHECK-DAG: ste %f4, 0(%r2)
-; CHECK-DAG: ltebr %f0, %f0
+; CHECK-DAG: ltebr %f1, %f0
 ; CHECK-NEXT: ber %r14
 ; CHECK: br %r14
 entry:
@@ -298,13 +298,14 @@ exit:
   ret float %add
 }
 
-; Test that LER gets converted to LTEBR where useful.
+; Test that LER does not get converted to LTEBR as %f0 is live after it.
 define float @f12(float %dummy, float %val) #0 {
 ; CHECK-LABEL: f12:
-; CHECK: ltebr %f0, %f2
+; CHECK: ler %f0, %f2
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltebr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -323,13 +324,14 @@ exit:
   ret float %ret
 }
 
-; Test that LDR gets converted to LTDBR where useful.
+; Test that LDR does not get converted to LTDBR as %f0 is live after it.
 define double @f13(double %dummy, double %val) #0 {
 ; CHECK-LABEL: f13:
-; CHECK: ltdbr %f0, %f2
+; CHECK: ldr %f0, %f2
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltdbr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -348,16 +350,17 @@ exit:
   ret double %ret
 }
 
-; Test that LXR gets converted to LTXBR where useful.
+; Test that LXR does not get converted to LTXBR as %f4 is live after it.
 define void @f14(ptr %ptr1, ptr %ptr2) #0 {
 ; CHECK-LABEL: f14:
-; CHECK: ltxbr
+; CHECK: lxr
 ; CHECK-NEXT: dxbr
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
 ; CHECK-NEXT: mxbr
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
+; CHECK-NEXT: ltxbr
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -382,13 +385,14 @@ exit:
 }
 
 ; Test a case where it is the source rather than destination of LER that
-; we need.
+; we need, but cannot convert the LER.
 define float @f15(float %val, float %dummy) #0 {
 ; CHECK-LABEL: f15:
-; CHECK: ltebr %f2, %f0
+; CHECK: ler %f2, %f0
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f2
 ; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltebr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -408,13 +412,14 @@ exit:
 }
 
 ; Test a case where it is the source rather than destination of LDR that
-; we need.
+; we need, but cannot convert the LDR.
 define double @f16(double %val, double %dummy) #0 {
 ; CHECK-LABEL: f16:
-; CHECK: ltdbr %f2, %f0
+; CHECK: ldr %f2, %f0
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f2
 ; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltdbr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -463,7 +468,7 @@ exit:
 define float @f18(float %a, float %b, ptr %dest) #0 {
 ; CHECK-LABEL: f18:
 ; CHECK: aebr %f0, %f2
-; CHECK: ltebr %f0, %f0
+; CHECK: ltebr %f1, %f0
 ; CHECK-NEXT: ber %r14
 ; CHECK: br %r14
 entry:
@@ -494,7 +499,7 @@ define float @f19(float %dummy, float %val) #0 {
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
-; CHECK-NEXT: ltebr %f2, %f2
+; CHECK-NEXT: ltebr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
diff --git a/llvm/test/CodeGen/SystemZ/tdc-06.ll b/llvm/test/CodeGen/SystemZ/tdc-06.ll
index 83ddb6e943a3b3..4ebf020c973daf 100644
--- a/llvm/test/CodeGen/SystemZ/tdc-06.ll
+++ b/llvm/test/CodeGen/SystemZ/tdc-06.ll
@@ -11,7 +11,7 @@ define i32 @fpc(double %x) {
 entry:
 ; CHECK-LABEL: fpc
 ; CHECK-DAG: lhi %r2, 5
-; CHECK-DAG: ltdbr %f0, %f0
+; CHECK-DAG: ltdbr %f1, %f0
 ; CHECK: je [[RET:.L.*]]
   %testeq = fcmp oeq double %x, 0.000000e+00
   br i1 %testeq, label %ret, label %nonzero, !prof !1
diff --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
index 9391b94f09d170..7bdf4e06029d2a 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
@@ -59,7 +59,8 @@ define double @f4(double %dummy, double %val) {
 ; Test a f64 constant compare/select resulting in maximum.
 define double @f5(double %dummy, double %val) {
 ; CHECK-LABEL: f5:
-; CHECK: ltdbr	%f0, %f2
+; CHECK: ltdbr	%f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
 ; CHECK: br %r14
   %cmp = fcmp ugt double %val, 0.0
   %ret = select i1 %cmp, double %val, double 0.0
diff --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
index dae80ec0a46ee2..bf27eb3e56036c 100644
--- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
@@ -59,7 +59,8 @@ define double @f4(double %dummy, double %val) {
 ; Test a f64 constant compare/select resulting in minimum.
 define double @f5(double %dummy, double %val) {
 ; CHECK-LABEL: f5:
-;	CHECK: ltdbr	%f0, %f2
+;	CHECK: ltdbr	%f1, %f2
+;  CHECK-NEXT: ldr	%f0, %f2
 ;	CHECK: bnher	%r14
   %cmp = fcmp ult double %val, 0.0
   %ret = select i1 %cmp, double %val, double 0.0

>From 7d22ecfd4e3410a29df4213472a8350c5d119ee3 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Sat, 13 Jan 2024 17:04:10 -0600
Subject: [PATCH 2/2] Remove Compare pseudos

---
 llvm/lib/Target/SystemZ/SystemZElimCompare.cpp | 14 +++-----------
 .../lib/Target/SystemZ/SystemZISelLowering.cpp | 15 ++++-----------
 llvm/lib/Target/SystemZ/SystemZISelLowering.h  |  3 +--
 llvm/lib/Target/SystemZ/SystemZInstrFP.td      | 18 ++++++++----------
 llvm/lib/Target/SystemZ/SystemZInstrFormats.td | 13 -------------
 llvm/lib/Target/SystemZ/SystemZPatterns.td     |  7 +++----
 llvm/lib/Target/SystemZ/SystemZScheduleZ13.td  |  4 +---
 llvm/lib/Target/SystemZ/SystemZScheduleZ14.td  |  4 +---
 llvm/lib/Target/SystemZ/SystemZScheduleZ15.td  |  4 +---
 llvm/lib/Target/SystemZ/SystemZScheduleZ16.td  |  4 +---
 llvm/lib/Target/SystemZ/SystemZScheduleZ196.td |  3 +--
 .../lib/Target/SystemZ/SystemZScheduleZEC12.td |  3 +--
 12 files changed, 25 insertions(+), 67 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 0991400af1c276..38e65cd2ea2123 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -492,18 +492,10 @@ bool SystemZElimCompare::adjustCCMasksForInstr(
 
 // Return true if Compare is a comparison against zero.
 static bool isCompareZero(MachineInstr &Compare) {
-  switch (Compare.getOpcode()) {
-  case SystemZ::LTEBRCompare:
-  case SystemZ::LTDBRCompare:
-  case SystemZ::LTXBRCompare:
+  if (isLoadAndTestAsCmp(Compare))
     return true;
-
-  default:
-    if (isLoadAndTestAsCmp(Compare))
-      return true;
-    return Compare.getNumExplicitOperands() == 2 &&
-           Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
-  }
+  return Compare.getNumExplicitOperands() == 2 &&
+    Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
 }
 
 // Try to optimize cases where comparison instruction Compare is testing
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 1e1dd65de25967..8407edd04372f3 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -9175,8 +9175,7 @@ MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
 }
 
 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
-    MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
-    unsigned CmpOpcode) const {
+    MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
   MachineFunction &MF = *MBB->getParent();
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
@@ -9186,12 +9185,6 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
   // This instruction will raise an exception if the input is a SNaN.
   //  MI.clearFlag(MachineInstr::MIFlag::NoFPExcept);
 
-  // XXX Worthwile?  Regallochints for dead def instead?
-  if (!Subtarget.hasVector() && MRI->hasOneNonDBGUser(SrcReg)) {
-    MI.setDesc(TII->get(CmpOpcode));
-    return MBB;
-  }
-
   // Create new virtual register of the same class as source.
   const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
   Register DstReg = MRI->createVirtualRegister(RC);
@@ -9441,11 +9434,11 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
   case SystemZ::TBEGINC:
     return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
   case SystemZ::LTEBRCompare_Pseudo:
-    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR, SystemZ::LTEBRCompare);
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
   case SystemZ::LTDBRCompare_Pseudo:
-    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR, SystemZ::LTDBRCompare);
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
   case SystemZ::LTXBRCompare_Pseudo:
-    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR, SystemZ::LTXBRCompare);
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
 
   case SystemZ::PROBED_ALLOCA:
     return emitProbedAlloca(MI, MBB);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 5067de5a7c74d5..baf4ba41654879 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -785,8 +785,7 @@ class SystemZTargetLowering : public TargetLowering {
                                           unsigned Opcode, bool NoFloat) const;
   MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
                                          MachineBasicBlock *MBB,
-                                         unsigned Opcode,
-                                         unsigned CmpOpcode) const;
+                                         unsigned Opcode) const;
   MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
                                       MachineBasicBlock *MBB) const;
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 74161c0db533e1..6e67425c1e788b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -55,20 +55,18 @@ let isCodeGenOnly = 1 in
 // not be used for comparison if the result will be used afterwards.
 let Uses = [FPC], mayRaiseFPException = 1,
     Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
-  defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
-  defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
+  def LTEBR : UnaryRRE<"ltebr", 0xB302, null_frag, FP32, FP32>;
+  def LTDBR : UnaryRRE<"ltdbr", 0xB312, null_frag, FP64, FP64>;
+  def LTXBR : UnaryRRE<"ltxbr", 0xB342, null_frag, FP128, FP128>;
 }
 
-// Use a normal load-and-test for compare against zero in case of vector
-// support (via a pseudo to simplify instruction selection). For a non-vector
-// subtarget these could be expanded to the "Compare" version which does not
-// clobber an extra phys-reg, but only if the result is not live. XXX Compare worthwhile?
+// Use a load-and-test for compare against zero (via a pseudo to simplify
+// instruction selection).
 let Uses = [FPC], mayRaiseFPException = 1,
     Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
-  def LTEBRCompare_Pseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
-  def LTDBRCompare_Pseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
-  def LTXBRCompare_Pseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
+  def LTEBRCompare_Pseudo : Pseudo<(outs), (ins FP32:$R1), []>;
+  def LTDBRCompare_Pseudo : Pseudo<(outs), (ins FP64:$R1), []>;
+  def LTXBRCompare_Pseudo : Pseudo<(outs), (ins FP128:$R1), []>;
 }
 defm : CompareZeroFP<LTEBRCompare_Pseudo, FP32>;
 defm : CompareZeroFP<LTDBRCompare_Pseudo, FP64>;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 50b5ce5b83c391..bb9fa0fc33ffa0 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5036,19 +5036,6 @@ class BranchPreloadMII<string mnemonic, bits<8> opcode>
             (ins imm32zx4:$M1, brtarget12bpp:$RI2, brtarget24bpp:$RI3),
             mnemonic#"\t$M1, $RI2, $RI3", []>;
 
-// A floating-point load-and test operation.  Create both a normal unary
-// operation and one that acts as a comparison against zero.
-// Note that the comparison against zero operation is not available if we
-// have vector support, since load-and-test instructions will partially
-// clobber the target (vector) register. It should also not be used if the
-// destination is used as it may actually be clobbered (if source is a SNaN).
-multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
-                          RegisterOperand cls> {
-  def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
-  let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
-    def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
-}
-
 //===----------------------------------------------------------------------===//
 // Pseudo instructions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td
index e3190eddb9f192..5e5dca77e9553b 100644
--- a/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -145,12 +145,11 @@ multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
 }
 
 // Record that INSN is a LOAD AND TEST that can be used to compare
-// registers in CLS against zero.  The instruction has separate R1 and R2
-// operands, but they must be the same when the instruction is used like this.
+// registers in CLS against zero.
 multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
-  def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
+  def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg)>;
   // The sign of the zero makes no difference.
-  def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
+  def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg)>;
 }
 
 // Use INSN for performing binary operation OPERATION of type VT
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index fe84ca45374bd3..9ce1a0d06b5afd 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -784,9 +784,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
-             (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index d5eab33dbb4a6e..7e6302ae656743 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -804,9 +804,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
-             (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
index 09b505fc149e00..89edcf426bd714 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
@@ -821,9 +821,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
-             (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
index 695604af1b8872..8f6dc3befc1976 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
@@ -822,9 +822,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
-             (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index 98ea7d09490699..226db9d4272f9b 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -716,8 +716,7 @@ def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat9, FPU, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>;
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index bd0bc480a00810..f5ecdb1f438009 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -754,8 +754,7 @@ def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat9, FPU, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>;