[llvm] 5dc339d - [AArch64][GlobalISel] Fold 64-bit cmps with 64-bit adds
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 21 13:55:48 PDT 2021
Author: Jessica Paquette
Date: 2021-10-21T13:51:44-07:00
New Revision: 5dc339d9825f1dbe788cfb69c88210a59bbf8e3a
URL: https://github.com/llvm/llvm-project/commit/5dc339d9825f1dbe788cfb69c88210a59bbf8e3a
DIFF: https://github.com/llvm/llvm-project/commit/5dc339d9825f1dbe788cfb69c88210a59bbf8e3a.diff
LOG: [AArch64][GlobalISel] Fold 64-bit cmps with 64-bit adds
G_ICMP is selected to an arithmetic overflow op (ADDS/SUBS/etc) with a dead
destination + a CSINC instruction.
We have a fold which allows us to combine 32-bit adds with G_ICMP.
The problem with G_ICMP is that we model it as always having a 32-bit
destination even though it can be a 64-bit operation. So, we were missing some
opportunities for 64-bit folds.
This patch teaches the fold to recognize 64-bit G_ICMPs + refactors some of
the code surrounding CSINC accordingly.
(Later down the line, I think we should probably change the way we handle G_ICMP
in general.)
Differential Revision: https://reviews.llvm.org/D111088
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 70e2610162efe..381f50f62e9b8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -276,13 +276,9 @@ class AArch64InstructionSelector : public InstructionSelector {
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
-
- /// Emit a CSet for an integer compare.
- ///
- /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
- MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder,
- Register SrcReg = AArch64::WZR) const;
+ MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
+ AArch64CC::CondCode Pred,
+ MachineIRBuilder &MIRBuilder) const;
/// Emit a CSet for a FP compare.
///
/// \p Dst is expected to be a 32-bit scalar register.
@@ -2213,27 +2209,55 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// fold the add into the cset for the cmp by using cinc.
//
// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
- Register X = I.getOperand(1).getReg();
-
- // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
- // early if we see it.
- LLT Ty = MRI.getType(X);
- if (Ty.isVector() || Ty.getSizeInBits() != 32)
+ Register AddDst = I.getOperand(0).getReg();
+ Register AddLHS = I.getOperand(1).getReg();
+ Register AddRHS = I.getOperand(2).getReg();
+ // Only handle scalars.
+ LLT Ty = MRI.getType(AddLHS);
+ if (Ty.isVector())
return false;
-
- Register CmpReg = I.getOperand(2).getReg();
- MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
+ // bits.
+ unsigned Size = Ty.getSizeInBits();
+ if (Size != 32 && Size != 64)
+ return false;
+ auto MatchCmp = [&](Register Reg) -> MachineInstr * {
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return nullptr;
+ // If the LHS of the add is 32 bits, then we want to fold a 32-bit
+ // compare.
+ if (Size == 32)
+ return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
+ // We model scalar compares using 32-bit destinations right now.
+ // If it's a 64-bit compare, it'll have 64-bit sources.
+ Register ZExt;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
+ return nullptr;
+ auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
+ if (!Cmp ||
+ MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
+ return nullptr;
+ return Cmp;
+ };
+ // Try to match
+ // z + (cmp pred, x, y)
+ MachineInstr *Cmp = MatchCmp(AddRHS);
if (!Cmp) {
- std::swap(X, CmpReg);
- Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ // (cmp pred, x, y) + z
+ std::swap(AddLHS, AddRHS);
+ Cmp = MatchCmp(AddRHS);
if (!Cmp)
return false;
}
- auto Pred =
- static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
- emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
- Cmp->getOperand(1), MIB);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
+ auto &PredOp = Cmp->getOperand(1);
+ auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
+ const AArch64CC::CondCode InvCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
+ MIB.setInstrAndDebugLoc(I);
+ emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
+ /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
+ emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
I.eraseFromParent();
return true;
}
@@ -2963,10 +2987,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// false, so to get the increment when it's true, we need to use the
// inverse. In this case, we want to increment when carry is set.
Register ZReg = AArch64::WZR;
- auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {ZReg, ZReg})
- .addImm(getInvertedCondCode(OpAndCC.second));
- constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
+ emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
+ getInvertedCondCode(OpAndCC.second), MIB);
I.eraseFromParent();
return true;
}
@@ -3303,9 +3325,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
- MIB);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
+ const AArch64CC::CondCode InvCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
+ emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
+ emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
+ /*Src2=*/AArch64::WZR, InvCC, MIB);
I.eraseFromParent();
return true;
}
@@ -4451,25 +4475,19 @@ MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?");
#endif
- const Register ZeroReg = AArch64::WZR;
- auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
- auto CSet =
- MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
- .addImm(getInvertedCondCode(CC));
- constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
- return &*CSet;
- };
-
+ const Register ZReg = AArch64::WZR;
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
+ auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
if (CC2 == AArch64CC::AL)
- return EmitCSet(Dst, CC1);
-
+ return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
+ MIRBuilder);
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
Register Def1Reg = MRI.createVirtualRegister(RC);
Register Def2Reg = MRI.createVirtualRegister(RC);
- EmitCSet(Def1Reg, CC1);
- EmitCSet(Def2Reg, CC2);
+ auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
+ emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
+ emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
return &*OrMI;
@@ -4578,16 +4596,25 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat(
}
MachineInstr *
-AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder,
- Register SrcReg) const {
- // CSINC increments the result when the predicate is false. Invert it.
- const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
- CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
- auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
- .addImm(InvCC);
- constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
- return &*I;
+AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
+ Register Src2, AArch64CC::CondCode Pred,
+ MachineIRBuilder &MIRBuilder) const {
+ auto &MRI = *MIRBuilder.getMRI();
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
+ // If we used a register class, then this won't necessarily have an LLT.
+ // Compute the size based off whether or not we have a class or bank.
+ unsigned Size;
+ if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
+ Size = TRI.getRegSizeInBits(*RC);
+ else
+ Size = MRI.getType(Dst).getSizeInBits();
+ // Some opcodes use s1.
+ assert(Size <= 64 && "Expected 64 bits or less only!");
+ static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
+ unsigned Opc = OpcTable[Size == 64];
+ auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
+ constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
+ return &*CSINC;
}
std::pair<MachineInstr *, AArch64CC::CondCode>
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
index 6ffc9210649f2..8587c40340aa4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
@@ -11,11 +11,12 @@ body: |
; CHECK-LABEL: name: cmp_imm_32
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
- ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
- ; CHECK: $w0 = COPY [[CSINCWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s32) = G_CONSTANT i32 42
%5:gpr(s32) = G_ICMP intpred(eq), %0(s32), %1
@@ -34,11 +35,12 @@ body: |
; CHECK-LABEL: name: cmp_imm_64
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
- ; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY]], 42, 0, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
- ; CHECK: $w0 = COPY [[CSINCWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY]], 42, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 42
%5:gpr(s32) = G_ICMP intpred(eq), %0(s64), %1
@@ -57,13 +59,14 @@ body: |
; CHECK-LABEL: name: cmp_imm_out_of_range
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 13132
- ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
- ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[SUBREG_TO_REG]], implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
- ; CHECK: $w0 = COPY [[CSINCWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 13132
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[SUBREG_TO_REG]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 13132
%5:gpr(s32) = G_ICMP intpred(eq), %0(s64), %1
@@ -81,11 +84,12 @@ body: |
liveins: $w0
; CHECK-LABEL: name: cmp_imm_lookthrough
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
- ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
- ; CHECK: $w0 = COPY [[CSINCWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s64) = G_CONSTANT i64 42
%2:gpr(s32) = G_TRUNC %1(s64)
@@ -104,11 +108,12 @@ body: |
liveins: $w0
; CHECK-LABEL: name: cmp_imm_lookthrough_bad_trunc
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
- ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
- ; CHECK: $w0 = COPY [[CSINCWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s64) = G_CONSTANT i64 68719476736 ; 0x1000000000
%2:gpr(s32) = G_TRUNC %1(s64) ; Value truncates to 0
@@ -127,11 +132,12 @@ body: |
liveins: $w0
; CHECK-LABEL: name: cmp_neg_imm_32
; CHECK: liveins: $w0
- ; CHECK: %reg0:gpr32sp = COPY $w0
- ; CHECK: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv
- ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
- ; CHECK: $w0 = COPY %cmp
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg0:gpr32sp = COPY $w0
+ ; CHECK-NEXT: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv
+ ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY %cmp
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%cst:gpr(s32) = G_CONSTANT i32 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
@@ -149,11 +155,12 @@ body: |
liveins: $x0
; CHECK-LABEL: name: cmp_neg_imm_64
; CHECK: liveins: $x0
- ; CHECK: %reg0:gpr64sp = COPY $x0
- ; CHECK: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv
- ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
- ; CHECK: $w0 = COPY %cmp
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv
+ ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY %cmp
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s64) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
@@ -171,12 +178,13 @@ body: |
liveins: $w0
; CHECK-LABEL: name: cmp_neg_imm_invalid
; CHECK: liveins: $w0
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %cst:gpr32 = MOVi32imm -5000
- ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv
- ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
- ; CHECK: $w0 = COPY %cmp
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg0:gpr32 = COPY $w0
+ ; CHECK-NEXT: %cst:gpr32 = MOVi32imm -5000
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv
+ ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY %cmp
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%cst:gpr(s32) = G_CONSTANT i32 -5000
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
@@ -194,12 +202,13 @@ body: |
; CHECK-LABEL: name: cmp_arith_extended_s64
; CHECK: liveins: $w0, $x1
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr64sp = COPY $x1
- ; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
- ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
- ; CHECK: $w0 = COPY %cmp
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg0:gpr32 = COPY $w0
+ ; CHECK-NEXT: %reg1:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
+ ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY %cmp
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%reg1:gpr(s64) = COPY $x1
%ext:gpr(s64) = G_ZEXT %reg0(s32)
@@ -221,14 +230,15 @@ body: |
; CHECK-LABEL: name: cmp_arith_extended_s32
; CHECK: liveins: $w0, $w1, $h0
- ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
- ; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
- ; CHECK: %reg1:gpr32sp = COPY $w1
- ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
- ; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
- ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
- ; CHECK: $w0 = COPY %cmp
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
+ ; CHECK-NEXT: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: %reg1:gpr32sp = COPY $w1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
+ ; CHECK-NEXT: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
+ ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY %cmp
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s16) = COPY $h0
%reg1:gpr(s32) = COPY $w1
%ext:gpr(s32) = G_ZEXT %reg0(s16)
@@ -252,14 +262,15 @@ body: |
; CHECK-LABEL: name: cmp_arith_extended_shl_too_large
; CHECK: liveins: $w0, $x1
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr64 = COPY $x1
- ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0
- ; CHECK: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
- ; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
- ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
- ; CHECK: $w0 = COPY %cmp
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg0:gpr32 = COPY $w0
+ ; CHECK-NEXT: %reg1:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0
+ ; CHECK-NEXT: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
+ ; CHECK-NEXT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
+ ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY %cmp
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%reg1:gpr(s64) = COPY $x1
%ext:gpr(s64) = G_ZEXT %reg0(s32)
@@ -284,13 +295,14 @@ body: |
; CHECK-LABEL: name: cmp_add_rhs
; CHECK: liveins: $w0, $w1, $w2
- ; CHECK: %cmp_lhs:gpr32 = COPY $w0
- ; CHECK: %cmp_rhs:gpr32 = COPY $w1
- ; CHECK: %add_rhs:gpr32 = COPY $w2
- ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
- ; CHECK: %add:gpr32 = CSINCWr %add_rhs, %add_rhs, 1, implicit $nzcv
- ; CHECK: $w0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cmp_lhs:gpr32 = COPY $w0
+ ; CHECK-NEXT: %cmp_rhs:gpr32 = COPY $w1
+ ; CHECK-NEXT: %add_rhs:gpr32 = COPY $w2
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+ ; CHECK-NEXT: %add:gpr32 = CSINCWr %add_rhs, %add_rhs, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY %add
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%cmp_lhs:gpr(s32) = COPY $w0
%cmp_rhs:gpr(s32) = COPY $w1
%add_rhs:gpr(s32) = COPY $w2
@@ -314,13 +326,14 @@ body: |
; CHECK-LABEL: name: cmp_add_lhs
; CHECK: liveins: $w0, $w1, $w2
- ; CHECK: %cmp_lhs:gpr32 = COPY $w0
- ; CHECK: %cmp_rhs:gpr32 = COPY $w1
- ; CHECK: %add_lhs:gpr32 = COPY $w2
- ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
- ; CHECK: %add:gpr32 = CSINCWr %add_lhs, %add_lhs, 1, implicit $nzcv
- ; CHECK: $w0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cmp_lhs:gpr32 = COPY $w0
+ ; CHECK-NEXT: %cmp_rhs:gpr32 = COPY $w1
+ ; CHECK-NEXT: %add_lhs:gpr32 = COPY $w2
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+ ; CHECK-NEXT: %add:gpr32 = CSINCWr %add_lhs, %add_lhs, 1, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY %add
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%cmp_lhs:gpr(s32) = COPY $w0
%cmp_rhs:gpr(s32) = COPY $w1
%add_lhs:gpr(s32) = COPY $w2
@@ -344,13 +357,14 @@ body: |
; CHECK-LABEL: name: cmp_add_lhs_vector
; CHECK: liveins: $q0, $q1, $q2
- ; CHECK: %cmp_lhs:fpr128 = COPY $q0
- ; CHECK: %cmp_rhs:fpr128 = COPY $q1
- ; CHECK: %add_lhs:fpr128 = COPY $q2
- ; CHECK: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs
- ; CHECK: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]]
- ; CHECK: $q0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cmp_lhs:fpr128 = COPY $q0
+ ; CHECK-NEXT: %cmp_rhs:fpr128 = COPY $q1
+ ; CHECK-NEXT: %add_lhs:fpr128 = COPY $q2
+ ; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs
+ ; CHECK-NEXT: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]]
+ ; CHECK-NEXT: $q0 = COPY %add
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%cmp_lhs:fpr(<4 x s32>) = COPY $q0
%cmp_rhs:fpr(<4 x s32>) = COPY $q1
%add_lhs:fpr(<4 x s32>) = COPY $q2
@@ -358,3 +372,108 @@ body: |
%add:fpr(<4 x s32>) = G_ADD %add_lhs, %cmp
$q0 = COPY %add(<4 x s32>)
RET_ReallyLR implicit $q0
+
+...
+---
+name: cmp_add_rhs_64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; The CSINC should use the add's RHS.
+ ; CHECK-LABEL: name: cmp_add_rhs_64
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
+ ; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
+ ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+ ; CHECK-NEXT: %add:gpr64 = CSINCXr %add_rhs, %add_rhs, 1, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY %add
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %cmp_lhs:gpr(s64) = COPY $x0
+ %cmp_rhs:gpr(s64) = COPY $x1
+ %add_rhs:gpr(s64) = COPY $x2
+ %cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
+ %cmp_ext:gpr(s64) = G_ZEXT %cmp
+ %add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
+ $x0 = COPY %add(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: cmp_add_rhs_64_zext_multi_use
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; The ZExt is used more than once so don't fold.
+ ; CHECK-LABEL: name: cmp_add_rhs_64_zext_multi_use
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
+ ; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
+ ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+ ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32
+ ; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs
+ ; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext
+ ; CHECK-NEXT: $x0 = COPY %or
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %cmp_lhs:gpr(s64) = COPY $x0
+ %cmp_rhs:gpr(s64) = COPY $x1
+ %add_rhs:gpr(s64) = COPY $x2
+ %cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
+ %cmp_ext:gpr(s64) = G_ZEXT %cmp
+ %add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
+ %or:gpr(s64) = G_OR %add, %cmp_ext
+ $x0 = COPY %or(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: cmp_add_rhs_64_cmp_multi_use
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; The cmp is used more than once so don't fold.
+ ; CHECK-LABEL: name: cmp_add_rhs_64_cmp_multi_use
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
+ ; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
+ ; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+ ; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+ ; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32
+ ; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], %cmp, %subreg.sub_32
+ ; CHECK-NEXT: %cmp_ext2:gpr64 = SBFMXri [[INSERT_SUBREG]], 0, 31
+ ; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext2
+ ; CHECK-NEXT: $x0 = COPY %or
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %cmp_lhs:gpr(s64) = COPY $x0
+ %cmp_rhs:gpr(s64) = COPY $x1
+ %add_rhs:gpr(s64) = COPY $x2
+ %cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
+ %cmp_ext:gpr(s64) = G_ZEXT %cmp
+ %add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
+ %cmp_ext2:gpr(s64) = G_SEXT %cmp
+ %or:gpr(s64) = G_OR %add, %cmp_ext2
+ $x0 = COPY %or(s64)
+ RET_ReallyLR implicit $x0
More information about the llvm-commits
mailing list