[llvm] 62ff996 - [SystemZ] Improve foldMemoryOperandImpl().

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 07:56:10 PDT 2020


Author: Jonas Paulsson
Date: 2020-03-10T15:54:47+01:00
New Revision: 62ff9960d337cd24d633d7dd38b49baaee05cf26

URL: https://github.com/llvm/llvm-project/commit/62ff9960d337cd24d633d7dd38b49baaee05cf26
DIFF: https://github.com/llvm/llvm-project/commit/62ff9960d337cd24d633d7dd38b49baaee05cf26.diff

LOG: [SystemZ]  Improve foldMemoryOperandImpl().

Swap the compare operands if LHS is spilled while updating the CCMask:s of
the CC users. This is relatively straight forward since the live-in lists for
the CC register can be assumed to be correct during register allocation
(thanks to 659efa2).

Also fold a spilled operand of an LOCR/SELR into an LOC(G).

Review: Ulrich Weigand

Differential Revision: https://reviews.llvm.org/D67437

Added: 
    llvm/test/CodeGen/SystemZ/cond-move-10.ll
    llvm/test/CodeGen/SystemZ/int-cmp-56.mir

Modified: 
    llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/lib/Target/SystemZ/SystemZInstrFormats.td
    llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
    llvm/lib/Target/SystemZ/SystemZInstrInfo.h
    llvm/lib/Target/SystemZ/SystemZInstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 37f0041d5440..ddced5a42103 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -2190,15 +2190,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) {
   return false;
 }
 
-// Return a version of comparison CC mask CCMask in which the LT and GT
-// actions are swapped.
-static unsigned reverseCCMask(unsigned CCMask) {
-  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
-          (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
-          (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
-          (CCMask & SystemZ::CCMASK_CMP_UO));
-}
-
 // Check whether C tests for equality between X and Y and whether X - Y
 // or Y - X is also computed.  In that case it's better to compare the
 // result of the subtraction against zero.
@@ -2234,7 +2225,7 @@ static void adjustForFNeg(Comparison &C) {
       SDNode *N = *I;
       if (N->getOpcode() == ISD::FNEG) {
         C.Op0 = SDValue(N, 0);
-        C.CCMask = reverseCCMask(C.CCMask);
+        C.CCMask = SystemZ::reverseCCMask(C.CCMask);
         return;
       }
     }
@@ -2601,7 +2592,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
 
   if (shouldSwapCmpOperands(C)) {
     std::swap(C.Op0, C.Op1);
-    C.CCMask = reverseCCMask(C.CCMask);
+    C.CCMask = SystemZ::reverseCCMask(C.CCMask);
   }
 
   adjustForTestUnderMask(DAG, DL, C);
@@ -6277,15 +6268,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
       return false;
 
     // Compute the effective CC mask for the new branch or select.
-    switch (CCMask) {
-    case SystemZ::CCMASK_CMP_EQ: break;
-    case SystemZ::CCMASK_CMP_NE: break;
-    case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
-    case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
-    case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
-    case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
-    default: return false;
-    }
+    CCMask = SystemZ::reverseCCMask(CCMask);
 
     // Return the updated CCReg link.
     CCReg = IPM->getOperand(0);

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index f064d33ac2f3..f0c117f6d1f5 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2807,6 +2807,10 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
   let mayLoad = 1;
   let AccessBytes = bytes;
   let CCMaskLast = 1;
+  let OpKey = mnemonic#"r"#cls;
+  let OpType = "mem";
+  let MemKey = mnemonic#cls;
+  let MemType = "target";
 }
 
 // Like CondUnaryRSY, but used for the raw assembly form.  The condition-code
@@ -3211,6 +3215,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
   let CCMaskLast = 1;
   let NumOpsKey = !subst("loc", "sel", mnemonic);
   let NumOpsValue = "2";
+  let OpKey = mnemonic#cls1;
+  let OpType = "reg";
 }
 
 // Like CondBinaryRRF, but used for the raw assembly form.  The condition-code
@@ -3252,6 +3258,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
   let CCMaskLast = 1;
   let NumOpsKey = mnemonic;
   let NumOpsValue = "3";
+  let OpKey = mnemonic#cls1;
+  let OpType = "reg";
 }
 
 // Like CondBinaryRRFa, but used for the raw assembly form.  The condition-code
@@ -4775,6 +4783,20 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
     let hasNoSchedulingInfo = 1;
 }
 
+// Same as MemFoldPseudo but for Load On Condition with CC operands.
+class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes,
+                             AddressingMode mode>
+  : Pseudo<(outs cls:$R1),
+           (ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> {
+    let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls;
+    let OpType = "mem";
+    let MemKey = mnemonic#cls;
+    let MemType = "pseudo";
+    let mayLoad = 1;
+    let AccessBytes = bytes;
+    let hasNoSchedulingInfo = 1;
+}
+
 // Like CompareRI, but expanded after RA depending on the choice of register.
 class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
                       ImmOpWithPattern imm>
@@ -4813,6 +4835,8 @@ class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1,
   let CCMaskLast = 1;
   let NumOpsKey = !subst("loc", "sel", mnemonic);
   let NumOpsValue = "2";
+  let OpKey = mnemonic#cls1;
+  let OpType = "reg";
 }
 
 // Like CondBinaryRRFa, but expanded after RA depending on the choice of
@@ -4826,6 +4850,8 @@ class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1,
   let CCMaskLast = 1;
   let NumOpsKey = mnemonic;
   let NumOpsValue = "3";
+  let OpKey = mnemonic#cls1;
+  let OpType = "reg";
 }
 
 // Like CondBinaryRIE, but expanded after RA depending on the choice of
@@ -4842,8 +4868,9 @@ class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>
 
 // Like CondUnaryRSY, but expanded after RA depending on the choice of
 // register.
-class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
-                         bits<5> bytes, AddressingMode mode = bdaddr20only>
+class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator,
+                         RegisterOperand cls, bits<5> bytes,
+                         AddressingMode mode = bdaddr20only>
   : Pseudo<(outs cls:$R1),
            (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
            [(set cls:$R1,
@@ -4854,6 +4881,10 @@ class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
   let mayLoad = 1;
   let AccessBytes = bytes;
   let CCMaskLast = 1;
+  let OpKey = mnemonic#"r"#cls;
+  let OpType = "mem";
+  let MemKey = mnemonic#cls;
+  let MemType = "target";
 }
 
 // Like CondStoreRSY, but expanded after RA depending on the choice of
@@ -5066,6 +5097,22 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
   def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
 }
 
+multiclass CondUnaryRSYPairAndMemFold<string mnemonic, bits<16> opcode,
+                                      SDPatternOperator operator,
+                                      RegisterOperand cls, bits<5> bytes,
+                                      AddressingMode mode = bdaddr20only> {
+  defm "" : CondUnaryRSYPair<mnemonic, opcode, operator, cls, bytes, mode>;
+  def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
+}
+
+multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic,
+                                        SDPatternOperator operator,
+                                        RegisterOperand cls, bits<5> bytes,
+                                        AddressingMode mode = bdaddr20only> {
+  def "" : CondUnaryRSYPseudo<mnemonic, operator, cls, bytes, mode>;
+  def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
+}
+
 // Define an instruction that operates on two fixed-length blocks of memory,
 // and associated pseudo instructions for operating on blocks of any size.
 // The Sequence form uses a straight-line sequence of instructions and

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 7161afe6f5e5..ee347ce65a6b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1150,14 +1150,31 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
   // commutable, try to change <INSN>R into <INSN>.
   unsigned NumOps = MI.getNumExplicitOperands();
   int MemOpcode = SystemZ::getMemOpcode(Opcode);
+  if (MemOpcode == -1)
+    return nullptr;
+
+  // Try to swap compare operands if possible.
+  bool NeedsCommute = false;
+  if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR ||
+       MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR) &&
+      OpNum == 0 && prepareCompareSwapOperands(MI))
+    NeedsCommute = true;
+
+  bool CCOperands = false;
+  if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR ||
+      MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) {
+    assert(MI.getNumOperands() == 6 && NumOps == 5 &&
+           "LOCR/SELR instruction operands corrupt?");
+    NumOps -= 2;
+    CCOperands = true;
+  }
 
   // See if this is a 3-address instruction that is convertible to 2-address
   // and suitable for folding below.  Only try this with virtual registers
   // and a provided VRM (during regalloc).
-  bool NeedsCommute = false;
-  if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
+  if (SystemZ::getTwoOperandOpcode(Opcode) != -1) {
     if (VRM == nullptr)
-      MemOpcode = -1;
+      return nullptr;
     else {
       assert(NumOps == 3 && "Expected two source registers.");
       Register DstReg = MI.getOperand(0).getReg();
@@ -1172,32 +1189,42 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
           DstPhys == VRM->getPhys(SrcReg))
         NeedsCommute = (OpNum == 1);
       else
-        MemOpcode = -1;
+        return nullptr;
     }
   }
 
-  if (MemOpcode >= 0) {
-    if ((OpNum == NumOps - 1) || NeedsCommute) {
-      const MCInstrDesc &MemDesc = get(MemOpcode);
-      uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
-      assert(AccessBytes != 0 && "Size of access should be known");
-      assert(AccessBytes <= Size && "Access outside the frame index");
-      uint64_t Offset = Size - AccessBytes;
-      MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
-                                        MI.getDebugLoc(), get(MemOpcode));
+  if ((OpNum == NumOps - 1) || NeedsCommute) {
+    const MCInstrDesc &MemDesc = get(MemOpcode);
+    uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
+    assert(AccessBytes != 0 && "Size of access should be known");
+    assert(AccessBytes <= Size && "Access outside the frame index");
+    uint64_t Offset = Size - AccessBytes;
+    MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
+                                      MI.getDebugLoc(), get(MemOpcode));
+    if (MI.isCompare()) {
+      assert(NumOps == 2 && "Expected 2 register operands for a compare.");
+      MIB.add(MI.getOperand(NeedsCommute ? 1 : 0));
+    }
+    else {
       MIB.add(MI.getOperand(0));
       if (NeedsCommute)
         MIB.add(MI.getOperand(2));
       else
         for (unsigned I = 1; I < OpNum; ++I)
           MIB.add(MI.getOperand(I));
-      MIB.addFrameIndex(FrameIndex).addImm(Offset);
-      if (MemDesc.TSFlags & SystemZII::HasIndex)
-        MIB.addReg(0);
-      transferDeadCC(&MI, MIB);
-      transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
-      return MIB;
     }
+    MIB.addFrameIndex(FrameIndex).addImm(Offset);
+    if (MemDesc.TSFlags & SystemZII::HasIndex)
+      MIB.addReg(0);
+    if (CCOperands) {
+      unsigned CCValid = MI.getOperand(NumOps).getImm();
+      unsigned CCMask = MI.getOperand(NumOps + 1).getImm();
+      MIB.addImm(CCValid);
+      MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask);
+    }
+    transferDeadCC(&MI, MIB);
+    transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
+    return MIB;
   }
 
   return nullptr;
@@ -1706,6 +1733,56 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
   return 0;
 }
 
+bool SystemZInstrInfo::
+prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const {
+  assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() &&
+         MBBI->getOperand(1).isReg() && !MBBI->mayLoad() &&
+         "Not a compare reg/reg.");
+
+  MachineBasicBlock *MBB = MBBI->getParent();
+  bool CCLive = true;
+  SmallVector<MachineInstr *, 4> CCUsers;
+  for (MachineBasicBlock::iterator Itr = std::next(MBBI);
+       Itr != MBB->end(); ++Itr) {
+    if (Itr->readsRegister(SystemZ::CC)) {
+      unsigned Flags = Itr->getDesc().TSFlags;
+      if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast))
+        CCUsers.push_back(&*Itr);
+      else
+        return false;
+    }
+    if (Itr->definesRegister(SystemZ::CC)) {
+      CCLive = false;
+      break;
+    }
+  }
+  if (CCLive) {
+    LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo());
+    LiveRegs.addLiveOuts(*MBB);
+    if (LiveRegs.contains(SystemZ::CC))
+      return false;
+  }
+
+  // Update all CC users.
+  for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) {
+    unsigned Flags = CCUsers[Idx]->getDesc().TSFlags;
+    unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ?
+                           0 : CCUsers[Idx]->getNumExplicitOperands() - 2);
+    MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1);
+    unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm());
+    CCMaskMO.setImm(NewCCMask);
+  }
+
+  return true;
+}
+
+unsigned SystemZ::reverseCCMask(unsigned CCMask) {
+  return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
+          (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
+          (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
+          (CCMask & SystemZ::CCMASK_CMP_UO));
+}
+
 unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
   if (!STI.hasLoadAndTrap())
     return 0;

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 23d3d5352493..56541903ea70 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -155,6 +155,10 @@ enum FusedCompareType {
 namespace SystemZ {
 int getTwoOperandOpcode(uint16_t Opcode);
 int getTargetMemOpcode(uint16_t Opcode);
+
+// Return a version of comparison CC mask CCMask in which the LT and GT
+// actions are swapped.
+unsigned reverseCCMask(unsigned CCMask);
 }
 
 class SystemZInstrInfo : public SystemZGenInstrInfo {
@@ -314,6 +318,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                            SystemZII::FusedCompareType Type,
                            const MachineInstr *MI = nullptr) const;
 
+  // Try to find all CC users of the compare instruction (MBBI) and update
+  // all of them to maintain equivalent behavior after swapping the compare
+  // operands. Return false if not all users can be conclusively found and
+  // handled. The compare instruction is *not* changed.
+  bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const;
+
   // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP
   // operation exists, returh the opcode for the latter, otherwise return 0.
   unsigned getLoadAndTrap(unsigned Opcode) const;

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 9579dcc0d1b6..fab6d267af87 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -492,7 +492,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
   let isCommutable = 1 in {
     // Expands to SELR or SELFHR or a branch-and-move sequence,
     // depending on the choice of registers.
-    def  SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>;
+    def  SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>;
     defm SELFHR  : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
     defm SELR    : CondBinaryRRFaPair<"selr",   0xB9F0, GR32, GR32, GR32>;
     defm SELGR   : CondBinaryRRFaPair<"selgr",  0xB9E3, GR64, GR64, GR64>;
@@ -525,13 +525,13 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
   let isCommutable = 1 in {
     // Expands to LOCR or LOCFHR or a branch-and-move sequence,
     // depending on the choice of registers.
-    def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>;
+    def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>;
     defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
   }
 
   // Load on condition.  Matched via DAG pattern.
   // Expands to LOC or LOCFH, depending on the choice of register.
-  def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>;
+  defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>;
   defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>;
 
   // Store on condition.  Expanded from CondStore* pseudos.
@@ -564,7 +564,7 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
 
   // Load on condition.  Matched via DAG pattern.
   defm LOC  : CondUnaryRSYPair<"loc",  0xEBF2, simple_load, GR32, 4>;
-  defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>;
+  defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>;
 
   // Store on condition.  Expanded from CondStore* pseudos.
   defm STOC  : CondStoreRSYPair<"stoc",  0xEBF3, GR32, 4>;

diff  --git a/llvm/test/CodeGen/SystemZ/cond-move-10.ll b/llvm/test/CodeGen/SystemZ/cond-move-10.ll
new file mode 100644
index 000000000000..eef9365390b5
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-move-10.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
+;
+; Test that a reload of a LOCGR/SELGR operand can be folded into a LOC
+; instruction.
+
+declare i64 @foo()
+declare i32 @foo32()
+
+; Check that conditional loads of spilled values can use LOCG rather than LOCGR.
+define void @f0(i64 *%ptr0, i64 *%dstPtr) {
+; CHECK-LABEL: f0:
+; CHECK: brasl %r14, foo at PLT
+; CHECK: locglh {{.*}}           # 8-byte Folded Reload
+; CHECK: br %r14
+  %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
+
+  %val0 = load i64, i64 *%ptr0
+  %val1 = load i64, i64 *%ptr1
+  %val2 = load i64, i64 *%ptr2
+  %val3 = load i64, i64 *%ptr3
+  %val4 = load i64, i64 *%ptr4
+  %val5 = load i64, i64 *%ptr5
+  %val6 = load i64, i64 *%ptr6
+  %val7 = load i64, i64 *%ptr7
+  %val8 = load i64, i64 *%ptr8
+  %val9 = load i64, i64 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %add0 = add i64 %ret, %val0
+  %add1 = add i64 %add0, %val1
+  %add2 = add i64 %add1, %val2
+  %add3 = add i64 %add2, %val3
+  %add4 = add i64 %add3, %val4
+  %add5 = add i64 %add4, %val5
+  %add6 = add i64 %add5, %val6
+  %add7 = add i64 %add6, %val7
+  %add8 = add i64 %add7, %val8
+
+  %cond = icmp eq i64 %add7, %add8
+  %res = select i1 %cond, i64 %add8, i64 %val9
+
+  store i64 %res, i64* %dstPtr
+  ret void
+}
+
+; Check that conditional loads of spilled values can use LOC rather than LOCR.
+define void @f1(i32 *%ptr0, i32 *%dstPtr) {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, foo32 at PLT
+; CHECK: loclh {{.*}}            # 4-byte Folded Reload
+; CHECK: br %r14
+  %ptr1 = getelementptr i32, i32 *%ptr0, i32 2
+  %ptr2 = getelementptr i32, i32 *%ptr0, i32 4
+  %ptr3 = getelementptr i32, i32 *%ptr0, i32 6
+  %ptr4 = getelementptr i32, i32 *%ptr0, i32 8
+  %ptr5 = getelementptr i32, i32 *%ptr0, i32 10
+  %ptr6 = getelementptr i32, i32 *%ptr0, i32 12
+  %ptr7 = getelementptr i32, i32 *%ptr0, i32 14
+  %ptr8 = getelementptr i32, i32 *%ptr0, i32 16
+  %ptr9 = getelementptr i32, i32 *%ptr0, i32 18
+
+  %val0 = load i32, i32 *%ptr0
+  %val1 = load i32, i32 *%ptr1
+  %val2 = load i32, i32 *%ptr2
+  %val3 = load i32, i32 *%ptr3
+  %val4 = load i32, i32 *%ptr4
+  %val5 = load i32, i32 *%ptr5
+  %val6 = load i32, i32 *%ptr6
+  %val7 = load i32, i32 *%ptr7
+  %val8 = load i32, i32 *%ptr8
+  %val9 = load i32, i32 *%ptr9
+
+  %ret = call i32 @foo32()
+
+  %add0 = add i32 %ret, %val0
+  %add1 = add i32 %add0, %val1
+  %add2 = add i32 %add1, %val2
+  %add3 = add i32 %add2, %val3
+  %add4 = add i32 %add3, %val4
+  %add5 = add i32 %add4, %val5
+  %add6 = add i32 %add5, %val6
+  %add7 = add i32 %add6, %val7
+  %add8 = add i32 %add7, %val8
+
+  %cond = icmp eq i32 %add7, %add8
+  %res = select i1 %cond, i32 %add8, i32 %val9
+
+  store i32 %res, i32* %dstPtr
+  ret void
+}

diff  --git a/llvm/test/CodeGen/SystemZ/int-cmp-56.mir b/llvm/test/CodeGen/SystemZ/int-cmp-56.mir
new file mode 100644
index 000000000000..3a29e5f9091d
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-56.mir
@@ -0,0 +1,323 @@
+# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z14 -run-pass greedy %s -o - \
+# RUN:   | FileCheck %s
+#
+# Test that a reload can be folded into a compare instruction after swapping
+# operands (when the LHS register is spilled).
+
+--- |
+  declare i64 @foo()
+  define i64 @fun1(i64* %ptr0)  { ret i64 0 }
+  define i64 @fun2(i64* %ptr0)  { ret i64 0 }
+
+  declare i32 @foo32()
+  define i32 @fun3(i32* %ptr0)  { ret i32 0 }
+  define i32 @fun4(i32* %ptr0)  { ret i32 0 }
+...
+
+
+# Test CGR -> CG
+# CHECK: name:            fun1
+# CHECK: CG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 8 from %stack.0)
+# CHECK-NEXT: %12:gr64bit = LOCGHI %12, 8, 14, 12, implicit killed $cc
+---
+name:            fun1
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: addr64bit }
+  - { id: 1, class: gr64bit }
+  - { id: 2, class: gr64bit }
+  - { id: 3, class: gr64bit }
+  - { id: 4, class: gr64bit }
+  - { id: 5, class: gr64bit }
+  - { id: 6, class: gr64bit }
+  - { id: 7, class: gr64bit }
+  - { id: 8, class: gr64bit }
+  - { id: 9, class: gr64bit }
+  - { id: 10, class: gr64bit }
+  - { id: 11, class: gr64bit }
+  - { id: 12, class: gr64bit }
+  - { id: 13, class: gr64bit }
+  - { id: 14, class: gr64bit }
+  - { id: 15, class: gr64bit }
+  - { id: 16, class: gr64bit }
+  - { id: 17, class: gr64bit }
+  - { id: 18, class: gr64bit }
+  - { id: 19, class: gr64bit }
+liveins:
+  - { reg: '$r2d', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+  hasCalls:        true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $r2d
+  
+    %0:addr64bit = COPY $r2d
+    %1:gr64bit = LG %0, 0, $noreg
+    %2:gr64bit = LG %0, 16, $noreg
+    %3:gr64bit = LG %0, 32, $noreg
+    %4:gr64bit = LG %0, 48, $noreg
+    %5:gr64bit = LG %0, 64, $noreg
+    %6:gr64bit = LG %0, 80, $noreg
+    %7:gr64bit = LG %0, 96, $noreg
+    %8:gr64bit = LG %0, 112, $noreg
+    %9:gr64bit = LG %0, 128, $noreg
+    ADJCALLSTACKDOWN 0, 0
+    CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
+    %10:gr64bit = COPY $r2d
+    ADJCALLSTACKUP 0, 0
+    CGR %10, %1, implicit-def $cc
+    %12:gr64bit = COPY %10
+    %12:gr64bit = LOCGHI %12, 0, 14, 10, implicit killed $cc
+    CGR %10, %2, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 1, 14, 10, implicit killed $cc
+    CGR %10, %3, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 2, 14, 10, implicit killed $cc
+    CGR %10, %4, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 3, 14, 10, implicit killed $cc
+    CGR %10, %5, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 4, 14, 10, implicit killed $cc
+    CGR %10, %6, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 5, 14, 10, implicit killed $cc
+    CGR %10, %7, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 6, 14, 10, implicit killed $cc
+    CGR %10, %8, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 7, 14, 10, implicit killed $cc
+    CGR %9, %10, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 8, 14, 10, implicit killed $cc
+    $r2d = COPY %12
+    Return implicit $r2d
+...
+
+
+# Test CLGR -> CLG
+# CHECK: name:            fun2
+# CHECK: CLG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 8 from %stack.0)
+# CHECK-NEXT: %12:gr64bit = LOCGHI %12, 8, 14, 12, implicit killed $cc
+---
+name:            fun2
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: addr64bit }
+  - { id: 1, class: gr64bit }
+  - { id: 2, class: gr64bit }
+  - { id: 3, class: gr64bit }
+  - { id: 4, class: gr64bit }
+  - { id: 5, class: gr64bit }
+  - { id: 6, class: gr64bit }
+  - { id: 7, class: gr64bit }
+  - { id: 8, class: gr64bit }
+  - { id: 9, class: gr64bit }
+  - { id: 10, class: gr64bit }
+  - { id: 11, class: gr64bit }
+  - { id: 12, class: gr64bit }
+  - { id: 13, class: gr64bit }
+  - { id: 14, class: gr64bit }
+  - { id: 15, class: gr64bit }
+  - { id: 16, class: gr64bit }
+  - { id: 17, class: gr64bit }
+  - { id: 18, class: gr64bit }
+  - { id: 19, class: gr64bit }
+liveins:
+  - { reg: '$r2d', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+  hasCalls:        true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $r2d
+  
+    %0:addr64bit = COPY $r2d
+    %1:gr64bit = LG %0, 0, $noreg
+    %2:gr64bit = LG %0, 16, $noreg
+    %3:gr64bit = LG %0, 32, $noreg
+    %4:gr64bit = LG %0, 48, $noreg
+    %5:gr64bit = LG %0, 64, $noreg
+    %6:gr64bit = LG %0, 80, $noreg
+    %7:gr64bit = LG %0, 96, $noreg
+    %8:gr64bit = LG %0, 112, $noreg
+    %9:gr64bit = LG %0, 128, $noreg
+    ADJCALLSTACKDOWN 0, 0
+    CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
+    %10:gr64bit = COPY $r2d
+    ADJCALLSTACKUP 0, 0
+    CGR %10, %1, implicit-def $cc
+    %12:gr64bit = COPY %10
+    %12:gr64bit = LOCGHI %12, 0, 14, 10, implicit killed $cc
+    CGR %10, %2, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 1, 14, 10, implicit killed $cc
+    CGR %10, %3, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 2, 14, 10, implicit killed $cc
+    CGR %10, %4, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 3, 14, 10, implicit killed $cc
+    CGR %10, %5, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 4, 14, 10, implicit killed $cc
+    CGR %10, %6, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 5, 14, 10, implicit killed $cc
+    CGR %10, %7, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 6, 14, 10, implicit killed $cc
+    CGR %10, %8, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 7, 14, 10, implicit killed $cc
+    CLGR %9, %10, implicit-def $cc
+    %12:gr64bit = LOCGHI %12, 8, 14, 10, implicit killed $cc
+    $r2d = COPY %12
+    Return implicit $r2d
+...
+
+
+# Test CR -> C
+# CHECK: name:            fun3
+# CHECK: C %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 4 from %stack.0)
+# CHECK: %12:gr32bit = LOCHIMux %12, 8, 14, 12, implicit killed $cc
+---
+name:            fun3
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: addr64bit }
+  - { id: 1, class: gr32bit }
+  - { id: 2, class: gr32bit }
+  - { id: 3, class: gr32bit }
+  - { id: 4, class: gr32bit }
+  - { id: 5, class: gr32bit }
+  - { id: 6, class: gr32bit }
+  - { id: 7, class: gr32bit }
+  - { id: 8, class: gr32bit }
+  - { id: 9, class: gr32bit }
+  - { id: 10, class: gr32bit }
+  - { id: 11, class: gr32bit }
+  - { id: 12, class: gr32bit }
+  - { id: 13, class: gr32bit }
+  - { id: 14, class: gr32bit }
+  - { id: 15, class: gr32bit }
+  - { id: 16, class: gr32bit }
+  - { id: 17, class: gr32bit }
+  - { id: 18, class: gr32bit }
+  - { id: 19, class: gr32bit }
+liveins:
+  - { reg: '$r2d', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+  hasCalls:        true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $r2d
+  
+    %0:addr64bit = COPY $r2d
+    %1:gr32bit = LMux %0, 0, $noreg
+    %2:gr32bit = LMux %0, 8, $noreg
+    %3:gr32bit = LMux %0, 16, $noreg
+    %4:gr32bit = LMux %0, 24, $noreg
+    %5:gr32bit = LMux %0, 32, $noreg
+    %6:gr32bit = LMux %0, 40, $noreg
+    %7:gr32bit = LMux %0, 48, $noreg
+    %8:gr32bit = LMux %0, 56, $noreg
+    %9:gr32bit = LMux %0, 64, $noreg
+    ADJCALLSTACKDOWN 0, 0
+    CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
+    %10:gr32bit = COPY $r2l
+    ADJCALLSTACKUP 0, 0
+    CR %10, %1, implicit-def $cc
+    %12:gr32bit = COPY %10
+    %12:gr32bit = LOCHIMux %12, 0, 14, 10, implicit killed $cc
+    CR %10, %2, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 1, 14, 10, implicit killed $cc
+    CR %10, %3, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 2, 14, 10, implicit killed $cc
+    CR %10, %4, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 3, 14, 10, implicit killed $cc
+    CR %10, %5, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 4, 14, 10, implicit killed $cc
+    CR %10, %6, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 5, 14, 10, implicit killed $cc
+    CR %10, %7, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 6, 14, 10, implicit killed $cc
+    CR %10, %8, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 7, 14, 10, implicit killed $cc
+    CR %9, %10, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 8, 14, 10, implicit killed $cc
+    $r2l = COPY %12
+    Return implicit $r2l
+...
+
+
+# Test CLR -> CL
+# CHECK: name:            fun4
+# CHECK: CL %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 4 from %stack.0)
+# CHECK: %12:gr32bit = LOCHIMux %12, 8, 14, 12, implicit killed $cc
+---
+name:            fun4
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: addr64bit }
+  - { id: 1, class: gr32bit }
+  - { id: 2, class: gr32bit }
+  - { id: 3, class: gr32bit }
+  - { id: 4, class: gr32bit }
+  - { id: 5, class: gr32bit }
+  - { id: 6, class: gr32bit }
+  - { id: 7, class: gr32bit }
+  - { id: 8, class: gr32bit }
+  - { id: 9, class: gr32bit }
+  - { id: 10, class: gr32bit }
+  - { id: 11, class: gr32bit }
+  - { id: 12, class: gr32bit }
+  - { id: 13, class: gr32bit }
+  - { id: 14, class: gr32bit }
+  - { id: 15, class: gr32bit }
+  - { id: 16, class: gr32bit }
+  - { id: 17, class: gr32bit }
+  - { id: 18, class: gr32bit }
+  - { id: 19, class: gr32bit }
+liveins:
+  - { reg: '$r2d', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+  hasCalls:        true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $r2d
+  
+    %0:addr64bit = COPY $r2d
+    %1:gr32bit = LMux %0, 0, $noreg
+    %2:gr32bit = LMux %0, 8, $noreg
+    %3:gr32bit = LMux %0, 16, $noreg
+    %4:gr32bit = LMux %0, 24, $noreg
+    %5:gr32bit = LMux %0, 32, $noreg
+    %6:gr32bit = LMux %0, 40, $noreg
+    %7:gr32bit = LMux %0, 48, $noreg
+    %8:gr32bit = LMux %0, 56, $noreg
+    %9:gr32bit = LMux %0, 64, $noreg
+    ADJCALLSTACKDOWN 0, 0
+    CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l
+    %10:gr32bit = COPY $r2l
+    ADJCALLSTACKUP 0, 0
+    CR %10, %1, implicit-def $cc
+    %12:gr32bit = COPY %10
+    %12:gr32bit = LOCHIMux %12, 0, 14, 10, implicit killed $cc
+    CR %10, %2, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 1, 14, 10, implicit killed $cc
+    CR %10, %3, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 2, 14, 10, implicit killed $cc
+    CR %10, %4, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 3, 14, 10, implicit killed $cc
+    CR %10, %5, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 4, 14, 10, implicit killed $cc
+    CR %10, %6, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 5, 14, 10, implicit killed $cc
+    CR %10, %7, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 6, 14, 10, implicit killed $cc
+    CR %10, %8, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 7, 14, 10, implicit killed $cc
+    CLR %9, %10, implicit-def $cc
+    %12:gr32bit = LOCHIMux %12, 8, 14, 10, implicit killed $cc
+    $r2l = COPY %12
+    Return implicit $r2l
+...


        


More information about the llvm-commits mailing list