[llvm] ae597a7 - [AArch64][GlobalISel] Do not modify predicate when optimizing G_ICMP

Tue May 26 17:51:51 PDT 2020

Author: Jessica Paquette
Date: 2020-05-26T17:51:08-07:00
New Revision: ae597a771ed4d7530e2ef232d02a253067e3312f

URL: https://github.com/llvm/llvm-project/commit/ae597a771ed4d7530e2ef232d02a253067e3312f
DIFF: https://github.com/llvm/llvm-project/commit/ae597a771ed4d7530e2ef232d02a253067e3312f.diff

LOG: [AArch64][GlobalISel] Do not modify predicate when optimizing G_ICMP

This fixes a bug in `tryOptArithImmedIntegerCompare`.

It is unsafe to update the predicate on a MachineOperand when optimizing a
G_ICMP, because it may be used in more than one place.

For example, when we are optimizing G_SELECT, we allow compares which are used
in more than one G_SELECT. If we modify the G_ICMP, then we'll break one of
the G_SELECTs.

Since the compare is being produced to either

1) Select a G_ICMP
2) Fold a G_ICMP into an instruction when profitable

there's no reason to actually modify it. The change is local to the specific
compare.

Instead, pass a `CmpInst::Predicate` to `tryOptArithImmedIntegerCompare` which
can be modified by reference.

Differential Revision: https://reviews.llvm.org/D80585

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 57eaf140a638..1b321260ed02 100644

--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -155,7 +155,9 @@ class AArch64InstructionSelector : public InstructionSelector {
 
   // Emit an integer compare between LHS and RHS, which checks for Predicate.
   //
-  // This may update Predicate when emitting the compare.
+  // This returns the produced compare instruction, and the predicate which
+  // was ultimately used in the compare. The predicate may 
diff er from what
+  // is passed in \p Predicate due to optimization.
   std::pair<MachineInstr *, CmpInst::Predicate>
   emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
                      MachineOperand &Predicate,
@@ -307,7 +309,7 @@ class AArch64InstructionSelector : public InstructionSelector {
                                       MachineIRBuilder &MIRBuilder) const;
   MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
                                                MachineOperand &RHS,
-                                               MachineOperand &Predicate,
+                                               CmpInst::Predicate &Predicate,
                                                MachineIRBuilder &MIB) const;
   MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
                                           MachineOperand &RHS,
@@ -3685,13 +3687,16 @@ AArch64InstructionSelector::emitIntegerCompare(
     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
     MachineIRBuilder &MIRBuilder) const {
   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
+  assert(Predicate.isPredicate() && "Expected predicate?");
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
 
+  CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
+
   // Fold the compare if possible.
   MachineInstr *FoldCmp =
       tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
   if (FoldCmp)
-    return {FoldCmp, (CmpInst::Predicate)Predicate.getPredicate()};
+    return {FoldCmp, P};
 
   // Can't fold into a CMN. Just emit a normal compare.
   unsigned CmpOpc = 0;
@@ -3712,21 +3717,21 @@ AArch64InstructionSelector::emitIntegerCompare(
 
   // Try to match immediate forms.
   MachineInstr *ImmedCmp =
-      tryOptArithImmedIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
+      tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
   if (ImmedCmp)
-    return {ImmedCmp, (CmpInst::Predicate)Predicate.getPredicate()};
+    return {ImmedCmp, P};
 
   // If we don't have an immediate, we may have a shift which can be folded
   // into the compare.
   MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
   if (ShiftedCmp)
-    return {ShiftedCmp, (CmpInst::Predicate)Predicate.getPredicate()};
+    return {ShiftedCmp, P};
 
   auto CmpMI =
       MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
   // Make sure that we can constrain the compare that we emitted.
   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
-  return {&*CmpMI, (CmpInst::Predicate)Predicate.getPredicate()};
+  return {&*CmpMI, P};
 }
 
 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -4042,7 +4047,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
 }
 
 MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
-    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
+    MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
     MachineIRBuilder &MIB) const {
   // Attempt to select the immediate form of an integer compare.
   MachineRegisterInfo &MRI = *MIB.getMRI();
@@ -4051,7 +4056,6 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
   unsigned Size = Ty.getSizeInBits();
   assert((Size == 32 || Size == 64) &&
          "Expected 32 bit or 64 bit compare only?");
-  auto P = (CmpInst::Predicate)Predicate.getPredicate();
 
   // Check if this is a case we can already handle.
   InstructionSelector::ComplexRendererFns ImmFns;
@@ -4066,6 +4070,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
     // We have a constant, but it doesn't fit. Try adjusting it by one and
     // updating the predicate if possible.
     uint64_t C = *MaybeImmed;
+    CmpInst::Predicate NewP;
     switch (P) {
     default:
       return nullptr;
@@ -4080,7 +4085,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
       if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
           (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
         return nullptr;
-      P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
+      NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
       C -= 1;
       break;
     case CmpInst::ICMP_ULT:
@@ -4093,7 +4098,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
       // When c is not zero.
       if (C == 0)
         return nullptr;
-      P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+      NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
       C -= 1;
       break;
     case CmpInst::ICMP_SLE:
@@ -4107,7 +4112,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
       if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
           (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
         return nullptr;
-      P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
+      NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
       C += 1;
       break;
     case CmpInst::ICMP_ULE:
@@ -4121,7 +4126,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
       if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
           (Size == 64 && C == UINT64_MAX))
         return nullptr;
-      P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
+      NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
       C += 1;
       break;
     }
@@ -4132,7 +4137,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
     ImmFns = select12BitValueWithLeftShift(C);
     if (!ImmFns)
       return nullptr;
-    Predicate.setPredicate(P);
+    P = NewP;
   }
 
   // At this point, we know we can select an immediate form. Go ahead and do

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir
index 59fcbd09c4c1..37d7ec60f553 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir
@@ -627,4 +627,82 @@ body:             |
     %3:gpr(s64) = G_AND %6, %5
     $x0 = COPY %3(s64)
     RET_ReallyLR implicit $x0
+
+...
+---
+name:            more_than_one_use_select
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+
+    ; Both of these selects use the same compare.
+    ;
+    ; They should both be optimized in the same way, so the SUBS produced for
+    ; each CSEL should be the same.
+
+    ; CHECK-LABEL: name: more_than_one_use_select
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %a:gpr64common = COPY $x0
+    ; CHECK: %b:gpr64 = COPY $x1
+    ; CHECK: %c:gpr64 = COPY $x2
+    ; CHECK: $xzr = SUBSXri %a, 0, 0, implicit-def $nzcv
+    ; CHECK: %select1:gpr64 = CSELXr %a, %b, 11, implicit $nzcv
+    ; CHECK: $xzr = SUBSXri %a, 0, 0, implicit-def $nzcv
+    ; CHECK: %select2:gpr64 = CSELXr %b, %c, 11, implicit $nzcv
+    ; CHECK: %add:gpr64 = ADDXrr %select1, %select2
+    ; CHECK: $x0 = COPY %add
+    ; CHECK: RET_ReallyLR implicit $x0
+    %a:gpr(s64) = COPY $x0
+    %b:gpr(s64) = COPY $x1
+    %c:gpr(s64) = COPY $x2
+    %cst:gpr(s64) = G_CONSTANT i64 -1
+    %cmp:gpr(s32) = G_ICMP intpred(sle), %a(s64), %cst
+    %trunc_cmp:gpr(s1) = G_TRUNC %cmp(s32)
+    %select1:gpr(s64) = G_SELECT %trunc_cmp(s1), %a, %b
+    %select2:gpr(s64) = G_SELECT %trunc_cmp(s1), %b, %c
+    %add:gpr(s64) = G_ADD %select1, %select2
+    $x0 = COPY %add(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            more_than_one_use_select_no_opt
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+
+    ; When we don't end up doing the optimization, we should not change the
+    ; predicate.
+    ;
+    ; In this case, the CSELXrs should both have predicate code 13.
+
+    ; CHECK-LABEL: name: more_than_one_use_select_no_opt
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %a:gpr64 = COPY $x0
+    ; CHECK: %b:gpr64 = COPY $x1
+    ; CHECK: %c:gpr64 = COPY $x2
+    ; CHECK: %cst:gpr64 = MOVi64imm 922337203685477580
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %a, %cst, implicit-def $nzcv
+    ; CHECK: %select1:gpr64 = CSELXr %a, %b, 13, implicit $nzcv
+    ; CHECK: [[SUBSXrr1:%[0-9]+]]:gpr64 = SUBSXrr %a, %cst, implicit-def $nzcv
+    ; CHECK: %select2:gpr64 = CSELXr %b, %c, 13, implicit $nzcv
+    ; CHECK: %add:gpr64 = ADDXrr %select1, %select2
+    ; CHECK: $x0 = COPY %add
+    ; CHECK: RET_ReallyLR implicit $x0
+    %a:gpr(s64) = COPY $x0
+    %b:gpr(s64) = COPY $x1
+    %c:gpr(s64) = COPY $x2
+    %cst:gpr(s64) = G_CONSTANT i64 922337203685477580
+    %cmp:gpr(s32) = G_ICMP intpred(sle), %a(s64), %cst
+    %trunc_cmp:gpr(s1) = G_TRUNC %cmp(s32)
+    %select1:gpr(s64) = G_SELECT %trunc_cmp(s1), %a, %b
+    %select2:gpr(s64) = G_SELECT %trunc_cmp(s1), %b, %c
+    %add:gpr(s64) = G_ADD %select1, %select2
+    $x0 = COPY %add(s64)
+    RET_ReallyLR implicit $x0
 ...