[llvm] 1d18930 - [SystemZ] Don't use FP Load and Test as comparisons to same reg (#78074)

Mon Jan 15 10:36:45 PST 2024

Author: Jonas Paulsson
Date: 2024-01-15T19:36:40+01:00
New Revision: 1d1893097a6319a6402331a54a588b1a5d961808

URL: https://github.com/llvm/llvm-project/commit/1d1893097a6319a6402331a54a588b1a5d961808
DIFF: https://github.com/llvm/llvm-project/commit/1d1893097a6319a6402331a54a588b1a5d961808.diff

LOG: [SystemZ] Don't use FP Load and Test as comparisons to same reg (#78074)

The usage of FP Load and Test instructions as a comparison against zero
with the assumption that the dest reg will always reflect the source reg is
actually incorrect: Unfortunately, a SNaN will be converted to a QNaN, so the
instruction may actually change the value as opposed to being a pure register
move with a test.

This patch
- changes instruction selection to always emit FP LT with a scratch def
  reg, which will typically be allocated to the same reg if dead.
- Removes the conversions into FP LT in SystemZElimcompare.

Added: 
    llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll

Modified: 
    llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
    llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/lib/Target/SystemZ/SystemZInstrFP.td
    llvm/lib/Target/SystemZ/SystemZInstrFormats.td
    llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
    llvm/lib/Target/SystemZ/SystemZPatterns.td
    llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
    llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
    llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
    llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
    llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
    llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
    llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
    llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
    llvm/test/CodeGen/SystemZ/tdc-06.ll
    llvm/test/CodeGen/SystemZ/vec-max-05.ll
    llvm/test/CodeGen/SystemZ/vec-min-05.ll

Removed: 
    llvm/test/CodeGen/SystemZ/fp-cmp-07.mir


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 340dba1362aff1f..7423ed429ffb68d 100644

--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -115,12 +115,6 @@ static bool preservesValueOf(MachineInstr &MI, unsigned Reg) {
   case SystemZ::LTR:
   case SystemZ::LTGR:
   case SystemZ::LTGFR:
-  case SystemZ::LER:
-  case SystemZ::LDR:
-  case SystemZ::LXR:
-  case SystemZ::LTEBR:
-  case SystemZ::LTDBR:
-  case SystemZ::LTXBR:
     if (MI.getOperand(1).getReg() == Reg)
       return true;
   }
@@ -498,18 +492,10 @@ bool SystemZElimCompare::adjustCCMasksForInstr(
 
 // Return true if Compare is a comparison against zero.
 static bool isCompareZero(MachineInstr &Compare) {
-  switch (Compare.getOpcode()) {
-  case SystemZ::LTEBRCompare:
-  case SystemZ::LTDBRCompare:
-  case SystemZ::LTXBRCompare:
+  if (isLoadAndTestAsCmp(Compare))
     return true;
-
-  default:
-    if (isLoadAndTestAsCmp(Compare))
-      return true;
-    return Compare.getNumExplicitOperands() == 2 &&
-           Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
-  }
+  return Compare.getNumExplicitOperands() == 2 &&
+    Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
 }
 
 // Try to optimize cases where comparison instruction Compare is testing
@@ -569,7 +555,7 @@ bool SystemZElimCompare::optimizeCompareZero(
 
   // Also do a forward search to handle cases where an instruction after the
   // compare can be converted, like
-  // LTEBRCompare %f0s, %f0s; %f2s = LER %f0s  =>  LTEBRCompare %f2s, %f0s
+  // CGHI %r0d, 0; %r1d = LGR %r0d  =>  LTGR %r1d, %r0d
   auto MIRange = llvm::make_range(
       std::next(MachineBasicBlock::iterator(&Compare)), MBB.end());
   for (MachineInstr &MI : llvm::make_early_inc_range(MIRange)) {

diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 896091d3d4475b4..924df12578fe4bb 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -9437,11 +9437,11 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
   case SystemZ::TBEGINC:
     return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
-  case SystemZ::LTEBRCompare_VecPseudo:
+  case SystemZ::LTEBRCompare_Pseudo:
     return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
-  case SystemZ::LTDBRCompare_VecPseudo:
+  case SystemZ::LTDBRCompare_Pseudo:
     return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
-  case SystemZ::LTXBRCompare_VecPseudo:
+  case SystemZ::LTXBRCompare_Pseudo:
     return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
 
   case SystemZ::PROBED_ALLOCA:

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index ea62e99a58399c9..6e67425c1e788b4 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -51,36 +51,27 @@ let isCodeGenOnly = 1 in
   def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>;
 
 // Moves between two floating-point registers that also set the condition
-// codes.
+// codes. Note that these instructions will turn SNaNs into QNaNs and should
+// not be used for comparison if the result will be used afterwards.
 let Uses = [FPC], mayRaiseFPException = 1,
     Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
-  defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
-  defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
-}
-// Note that LTxBRCompare is not available if we have vector support,
-// since load-and-test instructions will partially clobber the target
-// (vector) register.
-let Predicates = [FeatureNoVector] in {
-  defm : CompareZeroFP<LTEBRCompare, FP32>;
-  defm : CompareZeroFP<LTDBRCompare, FP64>;
-  defm : CompareZeroFP<LTXBRCompare, FP128>;
+  def LTEBR : UnaryRRE<"ltebr", 0xB302, null_frag, FP32, FP32>;
+  def LTDBR : UnaryRRE<"ltdbr", 0xB312, null_frag, FP64, FP64>;
+  def LTXBR : UnaryRRE<"ltxbr", 0xB342, null_frag, FP128, FP128>;
 }
 
-// Use a normal load-and-test for compare against zero in case of
-// vector support (via a pseudo to simplify instruction selection).
+// Use a load-and-test for compare against zero (via a pseudo to simplify
+// instruction selection).
 let Uses = [FPC], mayRaiseFPException = 1,
     Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
-  def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
-  def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
-  def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
-}
-let Predicates = [FeatureVector] in {
-  defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
-  defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
+  def LTEBRCompare_Pseudo : Pseudo<(outs), (ins FP32:$R1), []>;
+  def LTDBRCompare_Pseudo : Pseudo<(outs), (ins FP64:$R1), []>;
+  def LTXBRCompare_Pseudo : Pseudo<(outs), (ins FP128:$R1), []>;
 }
-let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in
-  defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
+defm : CompareZeroFP<LTEBRCompare_Pseudo, FP32>;
+defm : CompareZeroFP<LTDBRCompare_Pseudo, FP64>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+  defm : CompareZeroFP<LTXBRCompare_Pseudo, FP128>;
 
 // Moves between 64-bit integer and floating-point registers.
 def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 2e5ff4a1df673b8..bb9fa0fc33ffa0e 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5036,18 +5036,6 @@ class BranchPreloadMII<string mnemonic, bits<8> opcode>
             (ins imm32zx4:$M1, brtarget12bpp:$RI2, brtarget24bpp:$RI3),
             mnemonic#"\t$M1, $RI2, $RI3", []>;
 
-// A floating-point load-and test operation.  Create both a normal unary
-// operation and one that acts as a comparison against zero.
-// Note that the comparison against zero operation is not available if we
-// have vector support, since load-and-test instructions will partially
-// clobber the target (vector) register.
-multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
-                          RegisterOperand cls> {
-  def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
-  let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
-    def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
-}
-
 //===----------------------------------------------------------------------===//
 // Pseudo instructions
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index ac8c395f9064fb8..bf6547cc87ec5e3 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1692,9 +1692,6 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
   case SystemZ::LR:     return SystemZ::LTR;
   case SystemZ::LGFR:   return SystemZ::LTGFR;
   case SystemZ::LGR:    return SystemZ::LTGR;
-  case SystemZ::LER:    return SystemZ::LTEBR;
-  case SystemZ::LDR:    return SystemZ::LTDBR;
-  case SystemZ::LXR:    return SystemZ::LTXBR;
   case SystemZ::LCDFR:  return SystemZ::LCDBR;
   case SystemZ::LPDFR:  return SystemZ::LPDBR;
   case SystemZ::LNDFR:  return SystemZ::LNDBR;

diff  --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td
index e3190eddb9f1923..5e5dca77e9553b4 100644
--- a/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -145,12 +145,11 @@ multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
 }
 
 // Record that INSN is a LOAD AND TEST that can be used to compare
-// registers in CLS against zero.  The instruction has separate R1 and R2
-// operands, but they must be the same when the instruction is used like this.
+// registers in CLS against zero.
 multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
-  def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
+  def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg)>;
   // The sign of the zero makes no 
diff erence.
-  def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
+  def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg)>;
 }
 
 // Use INSN for performing binary operation OPERATION of type VT

diff  --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index fe84ca45374bd35..9ce1a0d06b5afd9 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -784,9 +784,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
-             (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;

diff  --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index d5eab33dbb4a6ed..7e6302ae656743d 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -804,9 +804,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
-             (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;

diff  --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
index 09b505fc149e00f..89edcf426bd7147 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
@@ -821,9 +821,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
-             (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;

diff  --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
index 695604af1b88726..8f6dc3befc1976b 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
@@ -822,9 +822,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
-             (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;

diff  --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index 98ea7d094906998..226db9d4272f9bd 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -716,8 +716,7 @@ def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat9, FPU, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>;

diff  --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index bd0bc480a008101..f5ecdb1f4380093 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -754,8 +754,7 @@ def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>;
 
 // Load and Test
 def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
-def : InstRW<[WLat9, FPU, NormalGr], (instregex "LT(E|D)BRCompare$")>;
-def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR(Compare)?$")>;
+def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>;
 
 // Copy sign
 def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>;

diff  --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
index f93b27f829f844a..c1773abe92305d2 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -163,7 +163,7 @@ exit:
 define float @f9(float %a, float %b, ptr %dest) {
 ; CHECK-LABEL: f9:
 ; CHECK: meebr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: blhr %r14
 ; CHECK: br %r14
 entry:
@@ -185,7 +185,7 @@ define float @f10(float %a, float %b, float %c, ptr %dest) {
 ; CHECK-LABEL: f10:
 ; CHECK: aebr %f0, %f2
 ; CHECK-NEXT: debr %f0, %f4
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: bner %r14
 ; CHECK: br %r14
 entry:
@@ -209,7 +209,7 @@ define float @f11(float %a, float %b, float %c, ptr %dest1, ptr %dest2) {
 ; CHECK: aebr %f0, %f2
 ; CHECK-NEXT: sebr %f4, %f0
 ; CHECK-DAG: ste %f4, 0(%r2)
-; CHECK-DAG: ltebr %f0, %f0
+; CHECK-DAG: ltebr %f1, %f0
 ; CHECK-NEXT: ber %r14
 ; CHECK: br %r14
 entry:
@@ -227,10 +227,11 @@ exit:
   ret float %add
 }
 
-; Test that LER gets converted to LTEBR where useful.
+; %val in %f2 must be preserved during comparison and also copied to %f0.
 define float @f12(float %dummy, float %val, ptr %dest) {
 ; CHECK-LABEL: f12:
-; CHECK: ltebr %f0, %f2
+; CHECK: ler %f0, %f2
+; CHECK-NEXT: ltebr %f1, %f2
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
@@ -249,10 +250,11 @@ exit:
   ret float %val
 }
 
-; Test that LDR gets converted to LTDBR where useful.
+; Same for double.
 define double @f13(double %dummy, double %val, ptr %dest) {
 ; CHECK-LABEL: f13:
-; CHECK: ltdbr %f0, %f2
+; CHECK: ldr %f0, %f2
+; CHECK-NEXT: ltdbr %f1, %f2
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
@@ -271,14 +273,15 @@ exit:
   ret double %val
 }
 
-; Test that LXR gets converted to LTXBR where useful.
+; LXR cannot be converted to LTXBR as its input is live after it.
 define void @f14(ptr %ptr1, ptr %ptr2) {
 ; CHECK-LABEL: f14:
-; CHECK: ltxbr
+; CHECK: lxr
 ; CHECK-NEXT: dxbr
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
 ; CHECK-NEXT: mxbr
+; CHECK-NEXT: ltxbr
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
 ; CHECK-NEXT: blr %r14
@@ -301,11 +304,10 @@ exit:
   ret void
 }
 
-; Test a case where it is the source rather than destination of LER that
-; we need.
 define float @f15(float %val, float %dummy, ptr %dest) {
 ; CHECK-LABEL: f15:
-; CHECK: ltebr %f2, %f0
+; CHECK: ltebr %f1, %f0
+; CHECK-NEXT: ler %f2, %f0
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f2
 ; CHECK-NEXT: #NO_APP
@@ -324,11 +326,10 @@ exit:
   ret float %val
 }
 
-; Test a case where it is the source rather than destination of LDR that
-; we need.
 define double @f16(double %val, double %dummy, ptr %dest) {
 ; CHECK-LABEL: f16:
-; CHECK: ltdbr %f2, %f0
+; CHECK: ltdbr %f1, %f0
+; CHECK: ldr %f2, %f0
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f2
 ; CHECK-NEXT: #NO_APP

diff  --git a/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir b/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir
deleted file mode 100644
index 63b9a3a4c9f0228..000000000000000
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir
+++ /dev/null
@@ -1,44 +0,0 @@
-# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as -start-after=block-placement %s -o - | FileCheck %s
-# Test that LTEBR is used without an unnecessary LER
-
---- |
-  define float @f15(float %val, float %dummy, float* %dest) {
-  entry:
-    call void asm sideeffect "blah $0", "{f2}"(float %val)
-    %cmp = fcmp olt float %val, 0.000000e+00
-    br i1 %cmp, label %exit, label %store
-
-  store:                                            ; preds = %entry
-    store float %val, float* %dest
-    br label %exit
-
-  exit:                                             ; preds = %store, %entry
-    ret float %val
-  }
-
-...
-
-# CHECK: ltebr %f2, %f0
-
----
-name:            f15
-tracksRegLiveness: true
-liveins:
-  - { reg: '$f0s', virtual-reg: '' }
-  - { reg: '$r2d', virtual-reg: '' }
-body:             |
-  bb.0.entry:
-    liveins: $f0s, $r2d
-
-    LTEBRCompare $f0s, $f0s, implicit-def $cc, implicit $fpc
-    $f2s = LER $f0s
-    INLINEASM &"blah $0", 1, 9, $f2s
-    CondReturn 15, 4, implicit $f0s, implicit $cc
-
-  bb.1.store:
-    liveins: $f0s, $r2d
-
-    STE $f0s, killed $r2d, 0, $noreg :: (store (s32) into %ir.dest)
-    Return implicit $f0s
-
-...

diff  --git a/llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll
new file mode 100644
index 000000000000000..01318f3cf119a86
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-zero.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Check comparisons with zero. If the tested value is live after the
+; comparison, load and test cannot be used to the same register.
+
+; Compared value is used afterwards.
+define i64 @f1(i64 %a, i64 %b, float %V, ptr %dst) {
+; CHECK-LABEL: f1:
+; CHECK: ltebr %f1, %f0
+  %cond = fcmp oeq float %V, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile float %V, ptr %dst
+  ret i64 %res
+}
+
+define i64 @f1m(i64 %a, i64 %b, float %V, ptr %dst) {
+; CHECK-LABEL: f1m:
+; CHECK: ltebr %f1, %f0
+  %cond = fcmp oeq float %V, -0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile float %V, ptr %dst
+  ret i64 %res
+}
+
+; Value only used in comparison.
+define i64 @f2(i64 %a, i64 %b, float %V) {
+; CHECK-LABEL: f2:
+; CHECK: ltebr %f0, %f0
+  %cond = fcmp oeq float %V, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+define i64 @f2m(i64 %a, i64 %b, float %V) {
+; CHECK-LABEL: f2m:
+; CHECK: ltebr %f0, %f0
+  %cond = fcmp oeq float %V, -0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Same for double
+define i64 @f3(i64 %a, i64 %b, double %V, ptr %dst) {
+; CHECK-LABEL: f3:
+; CHECK: ltdbr %f1, %f0
+  %cond = fcmp oeq double %V, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile double %V, ptr %dst
+  ret i64 %res
+}
+
+define i64 @f3m(i64 %a, i64 %b, double %V, ptr %dst) {
+; CHECK-LABEL: f3m:
+; CHECK: ltdbr %f1, %f0
+  %cond = fcmp oeq double %V, -0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile double %V, ptr %dst
+  ret i64 %res
+}
+
+define i64 @f4(i64 %a, i64 %b, double %V) {
+; CHECK-LABEL: f4:
+; CHECK: ltdbr %f0, %f0
+  %cond = fcmp oeq double %V, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+define i64 @f4m(i64 %a, i64 %b, double %V) {
+; CHECK-LABEL: f4m:
+; CHECK: ltdbr %f0, %f0
+  %cond = fcmp oeq double %V, -0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Same for fp128
+define i64 @f5(i64 %a, i64 %b, fp128 %V, ptr %dst) {
+; CHECK-LABEL: f5:
+; CHECK: ltxbr %f1, %f0
+  %cond = fcmp oeq fp128 %V, 0xL00000000000000008000000000000000
+  %res = select i1 %cond, i64 %a, i64 %b
+  store volatile fp128 %V, ptr %dst
+  ret i64 %res
+}
+
+define i64 @f6(i64 %a, i64 %b, fp128 %V) {
+; CHECK-LABEL: f6:
+; CHECK: ltxbr %f0, %f0
+  %cond = fcmp oeq fp128 %V, 0xL00000000000000008000000000000000
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}

diff  --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
index bc6f3f469c3a291..bf9ccbcd70550e2 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
@@ -140,7 +140,7 @@ exit:
 define float @f6(float %dummy, float %a, ptr %dest) #0 {
 ; CHECK-LABEL: f6:
 ; CHECK: lpdfr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: bhr %r14
 ; CHECK: br %r14
 entry:
@@ -163,7 +163,7 @@ exit:
 define float @f7(float %dummy, float %a, ptr %dest) #0 {
 ; CHECK-LABEL: f7:
 ; CHECK: lndfr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -187,7 +187,7 @@ exit:
 define float @f8(float %dummy, float %a, ptr %dest) #0 {
 ; CHECK-LABEL: f8:
 ; CHECK: lcdfr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: bler %r14
 ; CHECK: br %r14
 entry:
@@ -210,7 +210,7 @@ exit:
 define float @f9(float %a, float %b, ptr %dest) #0 {
 ; CHECK-LABEL: f9:
 ; CHECK: meebr %f0, %f2
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: blhr %r14
 ; CHECK: br %r14
 entry:
@@ -238,7 +238,7 @@ define float @f10(float %a, float %b, float %c, ptr %dest) #0 {
 ; CHECK-LABEL: f10:
 ; CHECK: aebr %f0, %f2
 ; CHECK-NEXT: debr %f0, %f4
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
 ; CHECK-NEXT: bner %r14
 ; CHECK: br %r14
 entry:
@@ -271,7 +271,7 @@ define float @f11(float %a, float %b, float %c, ptr %dest1, ptr %dest2) #0 {
 ; CHECK: aebr %f0, %f2
 ; CHECK-NEXT: sebr %f4, %f0
 ; CHECK-DAG: ste %f4, 0(%r2)
-; CHECK-DAG: ltebr %f0, %f0
+; CHECK-DAG: ltebr %f1, %f0
 ; CHECK-NEXT: ber %r14
 ; CHECK: br %r14
 entry:
@@ -298,13 +298,14 @@ exit:
   ret float %add
 }
 
-; Test that LER gets converted to LTEBR where useful.
+; Test that LER does not get converted to LTEBR as %f0 is live after it.
 define float @f12(float %dummy, float %val) #0 {
 ; CHECK-LABEL: f12:
-; CHECK: ltebr %f0, %f2
+; CHECK: ler %f0, %f2
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltebr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -323,13 +324,14 @@ exit:
   ret float %ret
 }
 
-; Test that LDR gets converted to LTDBR where useful.
+; Test that LDR does not get converted to LTDBR as %f0 is live after it.
 define double @f13(double %dummy, double %val) #0 {
 ; CHECK-LABEL: f13:
-; CHECK: ltdbr %f0, %f2
+; CHECK: ldr %f0, %f2
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltdbr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -348,16 +350,17 @@ exit:
   ret double %ret
 }
 
-; Test that LXR gets converted to LTXBR where useful.
+; Test that LXR does not get converted to LTXBR as %f4 is live after it.
 define void @f14(ptr %ptr1, ptr %ptr2) #0 {
 ; CHECK-LABEL: f14:
-; CHECK: ltxbr
+; CHECK: lxr
 ; CHECK-NEXT: dxbr
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
 ; CHECK-NEXT: mxbr
 ; CHECK-NEXT: std
 ; CHECK-NEXT: std
+; CHECK-NEXT: ltxbr
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -382,13 +385,14 @@ exit:
 }
 
 ; Test a case where it is the source rather than destination of LER that
-; we need.
+; we need, but cannot convert the LER.
 define float @f15(float %val, float %dummy) #0 {
 ; CHECK-LABEL: f15:
-; CHECK: ltebr %f2, %f0
+; CHECK: ler %f2, %f0
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f2
 ; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltebr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -408,13 +412,14 @@ exit:
 }
 
 ; Test a case where it is the source rather than destination of LDR that
-; we need.
+; we need, but cannot convert the LDR.
 define double @f16(double %val, double %dummy) #0 {
 ; CHECK-LABEL: f16:
-; CHECK: ltdbr %f2, %f0
+; CHECK: ldr %f2, %f0
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f2
 ; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltdbr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:
@@ -463,7 +468,7 @@ exit:
 define float @f18(float %a, float %b, ptr %dest) #0 {
 ; CHECK-LABEL: f18:
 ; CHECK: aebr %f0, %f2
-; CHECK: ltebr %f0, %f0
+; CHECK: ltebr %f1, %f0
 ; CHECK-NEXT: ber %r14
 ; CHECK: br %r14
 entry:
@@ -494,7 +499,7 @@ define float @f19(float %dummy, float %val) #0 {
 ; CHECK-NEXT: #APP
 ; CHECK-NEXT: blah %f0
 ; CHECK-NEXT: #NO_APP
-; CHECK-NEXT: ltebr %f2, %f2
+; CHECK-NEXT: ltebr %f1, %f2
 ; CHECK-NEXT: blr %r14
 ; CHECK: br %r14
 entry:

diff  --git a/llvm/test/CodeGen/SystemZ/tdc-06.ll b/llvm/test/CodeGen/SystemZ/tdc-06.ll
index 83ddb6e943a3b34..4ebf020c973daf5 100644
--- a/llvm/test/CodeGen/SystemZ/tdc-06.ll
+++ b/llvm/test/CodeGen/SystemZ/tdc-06.ll
@@ -11,7 +11,7 @@ define i32 @fpc(double %x) {
 entry:
 ; CHECK-LABEL: fpc
 ; CHECK-DAG: lhi %r2, 5
-; CHECK-DAG: ltdbr %f0, %f0
+; CHECK-DAG: ltdbr %f1, %f0
 ; CHECK: je [[RET:.L.*]]
   %testeq = fcmp oeq double %x, 0.000000e+00
   br i1 %testeq, label %ret, label %nonzero, !prof !1

diff  --git a/llvm/test/CodeGen/SystemZ/vec-max-05.ll b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
index 9391b94f09d170d..7bdf4e06029d2a7 100644
--- a/llvm/test/CodeGen/SystemZ/vec-max-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-max-05.ll
@@ -59,7 +59,8 @@ define double @f4(double %dummy, double %val) {
 ; Test a f64 constant compare/select resulting in maximum.
 define double @f5(double %dummy, double %val) {
 ; CHECK-LABEL: f5:
-; CHECK: ltdbr	%f0, %f2
+; CHECK: ltdbr	%f1, %f2
+; CHECK-NEXT: ldr %f0, %f2
 ; CHECK: br %r14
   %cmp = fcmp ugt double %val, 0.0
   %ret = select i1 %cmp, double %val, double 0.0

diff  --git a/llvm/test/CodeGen/SystemZ/vec-min-05.ll b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
index dae80ec0a46ee26..bf27eb3e56036c8 100644
--- a/llvm/test/CodeGen/SystemZ/vec-min-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-min-05.ll
@@ -59,7 +59,8 @@ define double @f4(double %dummy, double %val) {
 ; Test a f64 constant compare/select resulting in minimum.
 define double @f5(double %dummy, double %val) {
 ; CHECK-LABEL: f5:
-;	CHECK: ltdbr	%f0, %f2
+;	CHECK: ltdbr	%f1, %f2
+;  CHECK-NEXT: ldr	%f0, %f2
 ;	CHECK: bnher	%r14
   %cmp = fcmp ult double %val, 0.0
   %ret = select i1 %cmp, double %val, double 0.0