[llvm] 7a4e9a0 - [SystemZ] Implement memcmp of variable length with CLC.
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 5 09:21:51 PDT 2021
Author: Jonas Paulsson
Date: 2021-10-05T18:20:36+02:00
New Revision: 7a4e9a0c73667cb80e4572d41535a9e48f1ed9ef
URL: https://github.com/llvm/llvm-project/commit/7a4e9a0c73667cb80e4572d41535a9e48f1ed9ef
DIFF: https://github.com/llvm/llvm-project/commit/7a4e9a0c73667cb80e4572d41535a9e48f1ed9ef.diff
LOG: [SystemZ] Implement memcmp of variable length with CLC.
Following the same pattern of memset/memcpy, this patch implements a variable
length memcmp with a CLC loop followed by an EXRL instruction.
Review: Ulrich Weigand
Differential Revision: https://reviews.llvm.org/D107380
Added:
Modified:
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/lib/Target/SystemZ/SystemZInstrFormats.td
llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
llvm/test/CodeGen/SystemZ/memcmp-01.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 8b649a7d7983..4b06fc0b6c3d 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -7836,9 +7836,11 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// When generating more than one CLC, all but the last will need to
// branch to the end when a
diff erence is found.
- MachineBasicBlock *EndMBB = (ImmLength > 256 && Opcode == SystemZ::CLC
- ? SystemZ::splitBlockAfter(MI, MBB)
- : nullptr);
+ MachineBasicBlock *EndMBB =
+ (Opcode == SystemZ::CLC &&
+ (ImmLength > 256 || LenMinus1Reg != SystemZ::NoRegister)
+ ? SystemZ::splitBlockAfter(MI, MBB)
+ : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
if (MI.getNumExplicitOperands() > 5) {
@@ -7880,8 +7882,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
StartMBB = SystemZ::emitBlockAfter(MBB);
LoopMBB = SystemZ::emitBlockAfter(StartMBB);
- NextMBB = LoopMBB;
- DoneMBB = SystemZ::emitBlockAfter(LoopMBB);
+ NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
+ DoneMBB = SystemZ::emitBlockAfter(NextMBB);
// MBB:
// # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB.
@@ -8000,19 +8002,24 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
: MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
.addReg(StartDestReg).addMBB(StartMBB)
- .addReg(NextDestReg).addMBB(LoopMBB);
+ .addReg(NextDestReg).addMBB(NextMBB);
if (!HaveSingleBase)
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
.addReg(StartSrcReg).addMBB(StartMBB)
- .addReg(NextSrcReg).addMBB(LoopMBB);
+ .addReg(NextSrcReg).addMBB(NextMBB);
MRI.constrainRegClass(LenMinus1Reg, &SystemZ::ADDR64BitRegClass);
- BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
- .addImm(Opcode)
- .addReg(LenMinus1Reg)
- .addReg(RemDestReg).addImm(DestDisp)
- .addReg(RemSrcReg).addImm(SrcDisp);
+ MachineInstrBuilder EXRL_MIB =
+ BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
+ .addImm(Opcode)
+ .addReg(LenMinus1Reg)
+ .addReg(RemDestReg).addImm(DestDisp)
+ .addReg(RemSrcReg).addImm(SrcDisp);
MBB->addSuccessor(AllDoneMBB);
MBB = AllDoneMBB;
+ if (EndMBB) {
+ EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
+ MBB->addLiveIn(SystemZ::CC);
+ }
}
}
@@ -8546,6 +8553,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
return emitMemMemWrapper(MI, MBB, SystemZ::XC);
case SystemZ::CLCSequence:
case SystemZ::CLCLoop:
+ case SystemZ::CLCLoopVarLen:
return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
case SystemZ::CLSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::CLST);
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 5cb46cdb36a6..547abad5f258 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5365,6 +5365,10 @@ multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
imm64:$length, GR64:$count256),
[(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256))]>;
+ def LoopVarLen : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ GR64:$length, GR64:$count256),
+ [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
+ GR64:$length, GR64:$count256))]>;
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index aad0180a2924..578359529dc8 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -53,7 +53,10 @@ static SDValue emitMemMemVarLen(SelectionDAG &DAG, const SDLoc &DL,
DAG.getConstant(-1, DL, MVT::i64));
SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
DAG.getConstant(8, DL, MVT::i64));
- return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, LenMinus1, TripC);
+ SDVTList VTs = Loop == SystemZISD::CLC_LOOP
+ ? DAG.getVTList(MVT::i32, MVT::Other)
+ : DAG.getVTList(MVT::Other);
+ return DAG.getNode(Loop, DL, VTs, Chain, Dst, Src, LenMinus1, TripC);
}
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
@@ -199,15 +202,17 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
+ SDValue CCReg;
+ // Swap operands to invert CC == 1 vs. CC == 2 cases.
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
assert(Bytes > 0 && "Caller should have handled 0-size case");
- // Swap operands to invert CC == 1 vs. CC == 2 cases.
- SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
- Chain = CCReg.getValue(1);
- return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
- }
- return std::make_pair(SDValue(), SDValue());
+ CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
+ } else
+ CCReg = emitMemMemVarLen(DAG, DL, SystemZISD::CLC_LOOP, Chain, Src2, Src1,
+ Size);
+ Chain = CCReg.getValue(1);
+ return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
diff --git a/llvm/test/CodeGen/SystemZ/memcmp-01.ll b/llvm/test/CodeGen/SystemZ/memcmp-01.ll
index 740a86750dd8..ccc89283a5b1 100644
--- a/llvm/test/CodeGen/SystemZ/memcmp-01.ll
+++ b/llvm/test/CodeGen/SystemZ/memcmp-01.ll
@@ -219,3 +219,30 @@ define i32 @f13(i8 *%src1, i8 *%src2) {
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)
ret i32 %res
}
+
+define i32 @f14(i8 *%src1, i8 *%src2, i64 %Len) {
+; CHECK-LABEL: f14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: aghi %r4, -1
+; CHECK-NEXT: cghi %r4, -1
+; CHECK-NEXT: je .LBB13_5
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: srlg %r0, %r4, 8
+; CHECK-NEXT: cgije %r0, 0, .LBB13_4
+; CHECK-NEXT: .LBB13_2: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: clc 0(256,%r3), 0(%r2)
+; CHECK-NEXT: jlh .LBB13_5
+; CHECK-NEXT: # %bb.3: # in Loop: Header=BB13_2 Depth=1
+; CHECK-NEXT: la %r3, 256(%r3)
+; CHECK-NEXT: la %r2, 256(%r2)
+; CHECK-NEXT: brctg %r0, .LBB13_2
+; CHECK-NEXT: .LBB13_4:
+; CHECK-NEXT: exrl %r4, .Ltmp0
+; CHECK-NEXT: .LBB13_5:
+; CHECK-NEXT: ipm %r2
+; CHECK-NEXT: sll %r2, 2
+; CHECK-NEXT: sra %r2, 30
+; CHECK-NEXT: br %r14
+ %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 %Len)
+ ret i32 %res
+}
More information about the llvm-commits
mailing list