[llvm] 47d0780 - [GlobalISel] Handle more types in narrowScalar for eq/ne G_ICMP

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 12 22:19:11 PDT 2021


Author: Jessica Paquette
Date: 2021-07-12T22:18:50-07:00
New Revision: 47d0780f454d3f7c42bdba13c0682bf2a0095bca

URL: https://github.com/llvm/llvm-project/commit/47d0780f454d3f7c42bdba13c0682bf2a0095bca
DIFF: https://github.com/llvm/llvm-project/commit/47d0780f454d3f7c42bdba13c0682bf2a0095bca.diff

LOG: [GlobalISel] Handle more types in narrowScalar for eq/ne G_ICMP

Generalize the existing eq/ne case using `extractParts`. The original code only
handled narrowings for types of width 2n->n. This generalization allows for any
type that can be broken down by `extractParts`.

General overview is:

- Loop over each narrow-sized part and do exactly what the 2-register case did.
- Loop over the leftover-sized parts and do the same thing
- Widen the leftover-sized XOR results to the desired narrow size
- OR that all together and then do the comparison against 0 (just like the old
  code)

This shows up a lot when building clang for AArch64 using GlobalISel, so it's
worth fixing. For the sake of simplicity, this doesn't handle the non-eq/ne
case yet.

Also remove the code in this case that notifies the observer; we're just going
to delete MI anyway so talking to the observer shouldn't be necessary.

Differential Revision: https://reviews.llvm.org/D105161

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 40f6f4a5ba23..b31cf3f38624 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1063,38 +1063,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     return Legalized;
   }
   case TargetOpcode::G_ICMP: {
-    uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
-    if (NarrowSize * 2 != SrcSize)
+    Register LHS = MI.getOperand(2).getReg();
+    LLT SrcTy = MRI.getType(LHS);
+    uint64_t SrcSize = SrcTy.getSizeInBits();
+    CmpInst::Predicate Pred =
+        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+    // TODO: Handle the non-equality case for weird sizes.
+    if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
       return UnableToLegalize;
 
-    Observer.changingInstr(MI);
-    Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
-    Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
-    MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
+    LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
+    SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
+    if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
+                      LHSLeftoverRegs))
+      return UnableToLegalize;
+
+    LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
+    SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
+    if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
+                      RHSPartRegs, RHSLeftoverRegs))
+      return UnableToLegalize;
 
-    Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
-    Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
-    MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
+    // We now have the LHS and RHS of the compare split into narrow-type
+    // registers, plus potentially some leftover type.
+    Register Dst = MI.getOperand(0).getReg();
+    LLT ResTy = MRI.getType(Dst);
+    if (ICmpInst::isEquality(Pred)) {
+      // For each part on the LHS and RHS, keep track of the result of XOR-ing
+      // them together. For each equal part, the result should be all 0s. For
+      // each non-equal part, we'll get at least one 1.
+      auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+      SmallVector<Register, 4> Xors;
+      for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
+        auto LHS = std::get<0>(LHSAndRHS);
+        auto RHS = std::get<1>(LHSAndRHS);
+        auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
+        Xors.push_back(Xor);
+      }
 
-    CmpInst::Predicate Pred =
-        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
-    LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
-
-    if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
-      MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
-      MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
-      MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
-      MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
-      MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
+      // Build a G_XOR for each leftover register. Each G_XOR must be widened
+      // to the desired narrow type so that we can OR them together later.
+      SmallVector<Register, 4> WidenedXors;
+      for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
+        auto LHS = std::get<0>(LHSAndRHS);
+        auto RHS = std::get<1>(LHSAndRHS);
+        auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
+        LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
+        buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
+                            /* PadStrategy = */ TargetOpcode::G_ZEXT);
+        Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
+      }
+
+      // Now, for each part we broke up, we know if they are equal/not equal
+      // based off the G_XOR. We can OR these all together and compare against
+      // 0 to get the result.
+      assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
+      auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
+      for (unsigned I = 2, E = Xors.size(); I < E; ++I)
+        Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
+      MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
     } else {
+      // TODO: Handle non-power-of-two types.
+      assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
+      assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
+      Register LHSL = LHSPartRegs[0];
+      Register LHSH = LHSPartRegs[1];
+      Register RHSL = RHSPartRegs[0];
+      Register RHSH = RHSPartRegs[1];
       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
       MachineInstrBuilder CmpHEQ =
           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
-      MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
+      MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
     }
-    Observer.changedInstr(MI);
     MI.eraseFromParent();
     return Legalized;
   }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
index d4c3d437d0ae..8a9bf798b578 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
@@ -79,3 +79,169 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            test_s128_eq
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: test_s128_eq
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK:   [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]]
+  ; CHECK:   [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]]
+  ; CHECK:   [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]]
+  ; CHECK:   [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
+  ; CHECK:   %cmp:_(s1) = G_TRUNC [[ICMP]](s32)
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.1
+  ; CHECK:   G_BR %bb.2
+  ; CHECK: bb.1:
+  ; CHECK:   successors:
+  ; CHECK: bb.2:
+  ; CHECK:   RET_ReallyLR
+  bb.1:
+    %lhs:_(s128) = G_IMPLICIT_DEF
+    %rhs:_(s128) = G_IMPLICIT_DEF
+    %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s128), %rhs
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+  bb.2:
+    successors:
+  bb.3:
+    RET_ReallyLR
+
+...
+---
+name:            test_s88_eq
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: test_s88_eq
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64)
+  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
+  ; CHECK:   [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64)
+  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8)
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
+  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK:   [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32)
+  ; CHECK:   [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]]
+  ; CHECK:   [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR1]](s32)
+  ; CHECK:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
+  ; CHECK:   [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]]
+  ; CHECK:   [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[C]]
+  ; CHECK:   [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[C]]
+  ; CHECK:   [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR2]](s64), [[C]]
+  ; CHECK:   %cmp:_(s1) = G_TRUNC [[ICMP]](s32)
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.1
+  ; CHECK:   G_BR %bb.2
+  ; CHECK: bb.1:
+  ; CHECK:   successors:
+  ; CHECK: bb.2:
+  ; CHECK:   RET_ReallyLR
+  bb.1:
+    %lhs:_(s88) = G_IMPLICIT_DEF
+    %rhs:_(s88) = G_IMPLICIT_DEF
+    %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s88), %rhs
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+  bb.2:
+    successors:
+  bb.3:
+    RET_ReallyLR
+
+...
+---
+name:            test_s88_ne
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: test_s88_ne
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64)
+  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
+  ; CHECK:   [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64)
+  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8)
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
+  ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK:   [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32)
+  ; CHECK:   [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]]
+  ; CHECK:   [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR1]](s32)
+  ; CHECK:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
+  ; CHECK:   [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]]
+  ; CHECK:   [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[C]]
+  ; CHECK:   [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[C]]
+  ; CHECK:   [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR2]](s64), [[C]]
+  ; CHECK:   %cmp:_(s1) = G_TRUNC [[ICMP]](s32)
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.1
+  ; CHECK:   G_BR %bb.2
+  ; CHECK: bb.1:
+  ; CHECK:   successors:
+  ; CHECK: bb.2:
+  ; CHECK:   RET_ReallyLR
+  bb.1:
+    %lhs:_(s88) = G_IMPLICIT_DEF
+    %rhs:_(s88) = G_IMPLICIT_DEF
+    %cmp:_(s1) = G_ICMP intpred(ne), %lhs(s88), %rhs
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+  bb.2:
+    successors:
+  bb.3:
+    RET_ReallyLR
+
+...
+---
+name:            test_s96_eq
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: test_s96_eq
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
+  ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+  ; CHECK:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
+  ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32)
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+  ; CHECK:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
+  ; CHECK:   [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0
+  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK:   [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+  ; CHECK:   [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT]], [[EXTRACT1]]
+  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[XOR1]](s32), [[C1]](s32)
+  ; CHECK:   [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]]
+  ; CHECK:   [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]]
+  ; CHECK:   %cmp:_(s1) = G_TRUNC [[ICMP]](s32)
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.1
+  ; CHECK:   G_BR %bb.2
+  ; CHECK: bb.1:
+  ; CHECK:   successors:
+  ; CHECK: bb.2:
+  ; CHECK:   RET_ReallyLR
+  bb.1:
+    %lhs:_(s96) = G_IMPLICIT_DEF
+    %rhs:_(s96) = G_IMPLICIT_DEF
+    %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s96), %rhs
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+  bb.2:
+    successors:
+  bb.3:
+    RET_ReallyLR


        


More information about the llvm-commits mailing list