[llvm] 47d0780 - [GlobalISel] Handle more types in narrowScalar for eq/ne G_ICMP
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 12 22:19:11 PDT 2021
Author: Jessica Paquette
Date: 2021-07-12T22:18:50-07:00
New Revision: 47d0780f454d3f7c42bdba13c0682bf2a0095bca
URL: https://github.com/llvm/llvm-project/commit/47d0780f454d3f7c42bdba13c0682bf2a0095bca
DIFF: https://github.com/llvm/llvm-project/commit/47d0780f454d3f7c42bdba13c0682bf2a0095bca.diff
LOG: [GlobalISel] Handle more types in narrowScalar for eq/ne G_ICMP
Generalize the existing eq/ne case using `extractParts`. The original code only
handled narrowings for types of width 2n->n. This generalization allows for any
type that can be broken down by `extractParts`.
General overview is:
- Loop over each narrow-sized part and do exactly what the 2-register case did.
- Loop over the leftover-sized parts and do the same thing
- Widen the leftover-sized XOR results to the desired narrow size
- OR that all together and then do the comparison against 0 (just like the old
code)
This shows up a lot when building clang for AArch64 using GlobalISel, so it's
worth fixing. For the sake of simplicity, this doesn't handle the non-eq/ne
case yet.
Also remove the code in this case that notifies the observer; we're just going
to delete MI anyway so talking to the observer shouldn't be necessary.
Differential Revision: https://reviews.llvm.org/D105161
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 40f6f4a5ba23..b31cf3f38624 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1063,38 +1063,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return Legalized;
}
case TargetOpcode::G_ICMP: {
- uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- if (NarrowSize * 2 != SrcSize)
+ Register LHS = MI.getOperand(2).getReg();
+ LLT SrcTy = MRI.getType(LHS);
+ uint64_t SrcSize = SrcTy.getSizeInBits();
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ // TODO: Handle the non-equality case for weird sizes.
+ if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
return UnableToLegalize;
- Observer.changingInstr(MI);
- Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
- Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
+ LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
+ SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
+ if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
+ LHSLeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
+ SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
+ if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
+ RHSPartRegs, RHSLeftoverRegs))
+ return UnableToLegalize;
- Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
- Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
+ // We now have the LHS and RHS of the compare split into narrow-type
+ // registers, plus potentially some leftover type.
+ Register Dst = MI.getOperand(0).getReg();
+ LLT ResTy = MRI.getType(Dst);
+ if (ICmpInst::isEquality(Pred)) {
+ // For each part on the LHS and RHS, keep track of the result of XOR-ing
+ // them together. For each equal part, the result should be all 0s. For
+ // each non-equal part, we'll get at least one 1.
+ auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+ SmallVector<Register, 4> Xors;
+ for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
+ auto LHS = std::get<0>(LHSAndRHS);
+ auto RHS = std::get<1>(LHSAndRHS);
+ auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
+ Xors.push_back(Xor);
+ }
- CmpInst::Predicate Pred =
- static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
- LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
-
- if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
- MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
- MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
- MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
- MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
- MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
+ // Build a G_XOR for each leftover register. Each G_XOR must be widened
+ // to the desired narrow type so that we can OR them together later.
+ SmallVector<Register, 4> WidenedXors;
+ for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
+ auto LHS = std::get<0>(LHSAndRHS);
+ auto RHS = std::get<1>(LHSAndRHS);
+ auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
+ LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
+ buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
+ /* PadStrategy = */ TargetOpcode::G_ZEXT);
+ Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
+ }
+
+ // Now, for each part we broke up, we know if they are equal/not equal
+ // based off the G_XOR. We can OR these all together and compare against
+ // 0 to get the result.
+ assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
+ auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
+ for (unsigned I = 2, E = Xors.size(); I < E; ++I)
+ Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
+ MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
} else {
+ // TODO: Handle non-power-of-two types.
+ assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
+ assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
+ Register LHSL = LHSPartRegs[0];
+ Register LHSH = LHSPartRegs[1];
+ Register RHSL = RHSPartRegs[0];
+ Register RHSH = RHSPartRegs[1];
MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpHEQ =
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
- MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
+ MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
}
- Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
index d4c3d437d0ae..8a9bf798b578 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
@@ -79,3 +79,169 @@ body: |
RET_ReallyLR
...
+---
+name: test_s128_eq
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_s128_eq
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]]
+ ; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]]
+ ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
+ ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32)
+ ; CHECK: G_BRCOND %cmp(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors:
+ ; CHECK: bb.2:
+ ; CHECK: RET_ReallyLR
+ bb.1:
+ %lhs:_(s128) = G_IMPLICIT_DEF
+ %rhs:_(s128) = G_IMPLICIT_DEF
+ %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s128), %rhs
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+ bb.2:
+ successors:
+ bb.3:
+ RET_ReallyLR
+
+...
+---
+name: test_s88_eq
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_s88_eq
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64)
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
+ ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8)
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32)
+ ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]]
+ ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR1]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
+ ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]]
+ ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[C]]
+ ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[C]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR2]](s64), [[C]]
+ ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32)
+ ; CHECK: G_BRCOND %cmp(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors:
+ ; CHECK: bb.2:
+ ; CHECK: RET_ReallyLR
+ bb.1:
+ %lhs:_(s88) = G_IMPLICIT_DEF
+ %rhs:_(s88) = G_IMPLICIT_DEF
+ %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s88), %rhs
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+ bb.2:
+ successors:
+ bb.3:
+ RET_ReallyLR
+
+...
+---
+name: test_s88_ne
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_s88_ne
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64)
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
+ ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8)
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32)
+ ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]]
+ ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR1]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
+ ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]]
+ ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[C]]
+ ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[C]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR2]](s64), [[C]]
+ ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32)
+ ; CHECK: G_BRCOND %cmp(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors:
+ ; CHECK: bb.2:
+ ; CHECK: RET_ReallyLR
+ bb.1:
+ %lhs:_(s88) = G_IMPLICIT_DEF
+ %rhs:_(s88) = G_IMPLICIT_DEF
+ %cmp:_(s1) = G_ICMP intpred(ne), %lhs(s88), %rhs
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+ bb.2:
+ successors:
+ bb.3:
+ RET_ReallyLR
+
+...
+---
+name: test_s96_eq
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_s96_eq
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32)
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
+ ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT]], [[EXTRACT1]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[XOR1]](s32), [[C1]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
+ ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32)
+ ; CHECK: G_BRCOND %cmp(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors:
+ ; CHECK: bb.2:
+ ; CHECK: RET_ReallyLR
+ bb.1:
+ %lhs:_(s96) = G_IMPLICIT_DEF
+ %rhs:_(s96) = G_IMPLICIT_DEF
+ %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s96), %rhs
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+ bb.2:
+ successors:
+ bb.3:
+ RET_ReallyLR
More information about the llvm-commits
mailing list