[llvm] ae597a7 - [AArch64][GlobalISel] Do not modify predicate when optimizing G_ICMP
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Tue May 26 17:51:51 PDT 2020
Author: Jessica Paquette
Date: 2020-05-26T17:51:08-07:00
New Revision: ae597a771ed4d7530e2ef232d02a253067e3312f
URL: https://github.com/llvm/llvm-project/commit/ae597a771ed4d7530e2ef232d02a253067e3312f
DIFF: https://github.com/llvm/llvm-project/commit/ae597a771ed4d7530e2ef232d02a253067e3312f.diff
LOG: [AArch64][GlobalISel] Do not modify predicate when optimizing G_ICMP
This fixes a bug in `tryOptArithImmedIntegerCompare`.
It is unsafe to update the predicate on a MachineOperand when optimizing a
G_ICMP, because it may be used in more than one place.
For example, when we are optimizing G_SELECT, we allow compares which are used
in more than one G_SELECT. If we modify the G_ICMP, then we'll break one of
the G_SELECTs.
Since the compare is being produced to either
1) Select a G_ICMP
2) Fold a G_ICMP into an instruction when profitable
there's no reason to actually modify it. The change is local to the specific
compare.
Instead, pass a `CmpInst::Predicate` to `tryOptArithImmedIntegerCompare` which
can be modified by reference.
Differential Revision: https://reviews.llvm.org/D80585
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 57eaf140a638..1b321260ed02 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -155,7 +155,9 @@ class AArch64InstructionSelector : public InstructionSelector {
// Emit an integer compare between LHS and RHS, which checks for Predicate.
//
- // This may update Predicate when emitting the compare.
+ // This returns the produced compare instruction, and the predicate which
+ // was ultimately used in the compare. The predicate may
diff er from what
+ // is passed in \p Predicate due to optimization.
std::pair<MachineInstr *, CmpInst::Predicate>
emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
@@ -307,7 +309,7 @@ class AArch64InstructionSelector : public InstructionSelector {
MachineIRBuilder &MIRBuilder) const;
MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
MachineOperand &RHS,
- MachineOperand &Predicate,
+ CmpInst::Predicate &Predicate,
MachineIRBuilder &MIB) const;
MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
MachineOperand &RHS,
@@ -3685,13 +3687,16 @@ AArch64InstructionSelector::emitIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
+ assert(Predicate.isPredicate() && "Expected predicate?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
+
// Fold the compare if possible.
MachineInstr *FoldCmp =
tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
if (FoldCmp)
- return {FoldCmp, (CmpInst::Predicate)Predicate.getPredicate()};
+ return {FoldCmp, P};
// Can't fold into a CMN. Just emit a normal compare.
unsigned CmpOpc = 0;
@@ -3712,21 +3717,21 @@ AArch64InstructionSelector::emitIntegerCompare(
// Try to match immediate forms.
MachineInstr *ImmedCmp =
- tryOptArithImmedIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
+ tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
if (ImmedCmp)
- return {ImmedCmp, (CmpInst::Predicate)Predicate.getPredicate()};
+ return {ImmedCmp, P};
// If we don't have an immediate, we may have a shift which can be folded
// into the compare.
MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
if (ShiftedCmp)
- return {ShiftedCmp, (CmpInst::Predicate)Predicate.getPredicate()};
+ return {ShiftedCmp, P};
auto CmpMI =
MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
// Make sure that we can constrain the compare that we emitted.
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return {&*CmpMI, (CmpInst::Predicate)Predicate.getPredicate()};
+ return {&*CmpMI, P};
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -4042,7 +4047,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
}
MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
- MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
+ MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
MachineIRBuilder &MIB) const {
// Attempt to select the immediate form of an integer compare.
MachineRegisterInfo &MRI = *MIB.getMRI();
@@ -4051,7 +4056,6 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
unsigned Size = Ty.getSizeInBits();
assert((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit compare only?");
- auto P = (CmpInst::Predicate)Predicate.getPredicate();
// Check if this is a case we can already handle.
InstructionSelector::ComplexRendererFns ImmFns;
@@ -4066,6 +4070,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
// We have a constant, but it doesn't fit. Try adjusting it by one and
// updating the predicate if possible.
uint64_t C = *MaybeImmed;
+ CmpInst::Predicate NewP;
switch (P) {
default:
return nullptr;
@@ -4080,7 +4085,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
(Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
return nullptr;
- P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
+ NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
C -= 1;
break;
case CmpInst::ICMP_ULT:
@@ -4093,7 +4098,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
// When c is not zero.
if (C == 0)
return nullptr;
- P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+ NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
C -= 1;
break;
case CmpInst::ICMP_SLE:
@@ -4107,7 +4112,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
(Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
return nullptr;
- P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
+ NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
C += 1;
break;
case CmpInst::ICMP_ULE:
@@ -4121,7 +4126,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
(Size == 64 && C == UINT64_MAX))
return nullptr;
- P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
+ NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
C += 1;
break;
}
@@ -4132,7 +4137,7 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
ImmFns = select12BitValueWithLeftShift(C);
if (!ImmFns)
return nullptr;
- Predicate.setPredicate(P);
+ P = NewP;
}
// At this point, we know we can select an immediate form. Go ahead and do
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir
index 59fcbd09c4c1..37d7ec60f553 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-immed-compare.mir
@@ -627,4 +627,82 @@ body: |
%3:gpr(s64) = G_AND %6, %5
$x0 = COPY %3(s64)
RET_ReallyLR implicit $x0
+
+...
+---
+name: more_than_one_use_select
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; Both of these selects use the same compare.
+ ;
+ ; They should both be optimized in the same way, so the SUBS produced for
+ ; each CSEL should be the same.
+
+ ; CHECK-LABEL: name: more_than_one_use_select
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %a:gpr64common = COPY $x0
+ ; CHECK: %b:gpr64 = COPY $x1
+ ; CHECK: %c:gpr64 = COPY $x2
+ ; CHECK: $xzr = SUBSXri %a, 0, 0, implicit-def $nzcv
+ ; CHECK: %select1:gpr64 = CSELXr %a, %b, 11, implicit $nzcv
+ ; CHECK: $xzr = SUBSXri %a, 0, 0, implicit-def $nzcv
+ ; CHECK: %select2:gpr64 = CSELXr %b, %c, 11, implicit $nzcv
+ ; CHECK: %add:gpr64 = ADDXrr %select1, %select2
+ ; CHECK: $x0 = COPY %add
+ ; CHECK: RET_ReallyLR implicit $x0
+ %a:gpr(s64) = COPY $x0
+ %b:gpr(s64) = COPY $x1
+ %c:gpr(s64) = COPY $x2
+ %cst:gpr(s64) = G_CONSTANT i64 -1
+ %cmp:gpr(s32) = G_ICMP intpred(sle), %a(s64), %cst
+ %trunc_cmp:gpr(s1) = G_TRUNC %cmp(s32)
+ %select1:gpr(s64) = G_SELECT %trunc_cmp(s1), %a, %b
+ %select2:gpr(s64) = G_SELECT %trunc_cmp(s1), %b, %c
+ %add:gpr(s64) = G_ADD %select1, %select2
+ $x0 = COPY %add(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: more_than_one_use_select_no_opt
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; When we don't end up doing the optimization, we should not change the
+ ; predicate.
+ ;
+ ; In this case, the CSELXrs should both have predicate code 13.
+
+ ; CHECK-LABEL: name: more_than_one_use_select_no_opt
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %a:gpr64 = COPY $x0
+ ; CHECK: %b:gpr64 = COPY $x1
+ ; CHECK: %c:gpr64 = COPY $x2
+ ; CHECK: %cst:gpr64 = MOVi64imm 922337203685477580
+ ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %a, %cst, implicit-def $nzcv
+ ; CHECK: %select1:gpr64 = CSELXr %a, %b, 13, implicit $nzcv
+ ; CHECK: [[SUBSXrr1:%[0-9]+]]:gpr64 = SUBSXrr %a, %cst, implicit-def $nzcv
+ ; CHECK: %select2:gpr64 = CSELXr %b, %c, 13, implicit $nzcv
+ ; CHECK: %add:gpr64 = ADDXrr %select1, %select2
+ ; CHECK: $x0 = COPY %add
+ ; CHECK: RET_ReallyLR implicit $x0
+ %a:gpr(s64) = COPY $x0
+ %b:gpr(s64) = COPY $x1
+ %c:gpr(s64) = COPY $x2
+ %cst:gpr(s64) = G_CONSTANT i64 922337203685477580
+ %cmp:gpr(s32) = G_ICMP intpred(sle), %a(s64), %cst
+ %trunc_cmp:gpr(s1) = G_TRUNC %cmp(s32)
+ %select1:gpr(s64) = G_SELECT %trunc_cmp(s1), %a, %b
+ %select2:gpr(s64) = G_SELECT %trunc_cmp(s1), %b, %c
+ %add:gpr(s64) = G_ADD %select1, %select2
+ $x0 = COPY %add(s64)
+ RET_ReallyLR implicit $x0
...
More information about the llvm-commits
mailing list