[clang-tools-extra] [llvm] [clang] [PowerPC] Check value uses in ValueBit tracking (PR #66040)
Qiu Chaofan via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 15 19:15:15 PST 2024
https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/66040
>From ebaafdd6d45bb62b1847e60df627dfd96971a22c Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan at cn.ibm.com>
Date: Tue, 12 Sep 2023 10:39:55 +0800
Subject: [PATCH] [PowerPC] Check value uses in ValueBit tracking
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 162 +++++++++++-------
llvm/test/CodeGen/PowerPC/int128_ldst.ll | 18 +-
.../PowerPC/loop-instr-form-prepare.ll | 6 +-
llvm/test/CodeGen/PowerPC/prefer-dqform.ll | 4 +-
llvm/test/CodeGen/PowerPC/rldimi.ll | 19 +-
5 files changed, 117 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b57d185bb638b8c..8af50b10d3c7e1d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1630,30 +1630,41 @@ class BitPermutationSelector {
bool &Interesting = ValueEntry->first;
SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
Bits.resize(NumBits);
+ SDValue LHS = V.getNumOperands() > 0 ? V.getOperand(0) : SDValue();
+ SDValue RHS = V.getNumOperands() > 1 ? V.getOperand(1) : SDValue();
switch (V.getOpcode()) {
default: break;
case ISD::ROTL:
- if (isa<ConstantSDNode>(V.getOperand(1))) {
+ if (isa<ConstantSDNode>(RHS)) {
unsigned RotAmt = V.getConstantOperandVal(1);
- const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
-
- for (unsigned i = 0; i < NumBits; ++i)
- Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
+ if (LHS.hasOneUse()) {
+ const auto &LHSBits = *getValueBits(LHS, NumBits).second;
+ for (unsigned i = 0; i < NumBits; ++i)
+ Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
+ } else {
+ for (unsigned i = 0; i < NumBits; ++i)
+ Bits[i] =
+ ValueBit(LHS, i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt);
+ }
return std::make_pair(Interesting = true, &Bits);
}
break;
case ISD::SHL:
case PPCISD::SHL:
- if (isa<ConstantSDNode>(V.getOperand(1))) {
+ if (isa<ConstantSDNode>(RHS)) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
- const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
-
- for (unsigned i = ShiftAmt; i < NumBits; ++i)
- Bits[i] = LHSBits[i - ShiftAmt];
+ if (LHS.hasOneUse()) {
+ const auto &LHSBits = *getValueBits(LHS, NumBits).second;
+ for (unsigned i = ShiftAmt; i < NumBits; ++i)
+ Bits[i] = LHSBits[i - ShiftAmt];
+ } else {
+ for (unsigned i = ShiftAmt; i < NumBits; ++i)
+ Bits[i] = ValueBit(LHS, i - ShiftAmt);
+ }
for (unsigned i = 0; i < ShiftAmt; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1663,13 +1674,17 @@ class BitPermutationSelector {
break;
case ISD::SRL:
case PPCISD::SRL:
- if (isa<ConstantSDNode>(V.getOperand(1))) {
+ if (isa<ConstantSDNode>(RHS)) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
- const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
-
- for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
- Bits[i] = LHSBits[i + ShiftAmt];
+ if (LHS.hasOneUse()) {
+ const auto &LHSBits = *getValueBits(LHS, NumBits).second;
+ for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
+ Bits[i] = LHSBits[i + ShiftAmt];
+ } else {
+ for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
+ Bits[i] = ValueBit(LHS, i + ShiftAmt);
+ }
for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1678,23 +1693,27 @@ class BitPermutationSelector {
}
break;
case ISD::AND:
- if (isa<ConstantSDNode>(V.getOperand(1))) {
+ if (isa<ConstantSDNode>(RHS)) {
uint64_t Mask = V.getConstantOperandVal(1);
- const SmallVector<ValueBit, 64> *LHSBits;
+ const SmallVector<ValueBit, 64> *LHSBits = nullptr;
// Mark this as interesting, only if the LHS was also interesting. This
// prevents the overall procedure from matching a single immediate 'and'
// (which is non-optimal because such an and might be folded with other
// things if we don't select it here).
- std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
+ if (LHS.hasOneUse())
+ std::tie(Interesting, LHSBits) = getValueBits(LHS, NumBits);
for (unsigned i = 0; i < NumBits; ++i)
- if (((Mask >> i) & 1) == 1)
- Bits[i] = (*LHSBits)[i];
- else {
+ if (((Mask >> i) & 1) == 1) {
+ if (LHS.hasOneUse())
+ Bits[i] = (*LHSBits)[i];
+ else
+ Bits[i] = ValueBit(LHS, i);
+ } else {
// AND instruction masks this bit. If the input is already zero,
// we have nothing to do here. Otherwise, make the bit ConstZero.
- if ((*LHSBits)[i].isZero())
+ if (LHS.hasOneUse() && (*LHSBits)[i].isZero())
Bits[i] = (*LHSBits)[i];
else
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1704,34 +1723,44 @@ class BitPermutationSelector {
}
break;
case ISD::OR: {
- const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
- const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
+ const auto *LHSBits =
+ LHS.hasOneUse() ? getValueBits(LHS, NumBits).second : nullptr;
+ const auto *RHSBits =
+ RHS.hasOneUse() ? getValueBits(RHS, NumBits).second : nullptr;
bool AllDisjoint = true;
SDValue LastVal = SDValue();
unsigned LastIdx = 0;
for (unsigned i = 0; i < NumBits; ++i) {
- if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
+ if (LHSBits && RHSBits && (*LHSBits)[i].isZero() &&
+ (*RHSBits)[i].isZero()) {
// If both inputs are known to be zero and one is ConstZero and
// another is VariableKnownToBeZero, we can select whichever
// we like. To minimize the number of bit groups, we select
// VariableKnownToBeZero if this bit is the next bit of the same
// input variable from the previous bit. Otherwise, we select
// ConstZero.
- if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
- LHSBits[i].getValueBitIndex() == LastIdx + 1)
- Bits[i] = LHSBits[i];
- else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
- RHSBits[i].getValueBitIndex() == LastIdx + 1)
- Bits[i] = RHSBits[i];
+ const auto &LBits = *LHSBits;
+ const auto &RBits = *RHSBits;
+ if (LBits[i].hasValue() && LBits[i].getValue() == LastVal &&
+ LBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = LBits[i];
+ else if (RBits[i].hasValue() && RBits[i].getValue() == LastVal &&
+ RBits[i].getValueBitIndex() == LastIdx + 1)
+ Bits[i] = RBits[i];
else
Bits[i] = ValueBit(ValueBit::ConstZero);
- }
- else if (LHSBits[i].isZero())
- Bits[i] = RHSBits[i];
- else if (RHSBits[i].isZero())
- Bits[i] = LHSBits[i];
- else {
+ } else if (LHSBits && (*LHSBits)[i].isZero()) {
+ if (RHSBits)
+ Bits[i] = (*RHSBits)[i];
+ else
+ Bits[i] = ValueBit(RHS, i);
+ } else if (RHSBits && (*RHSBits)[i].isZero()) {
+ if (LHSBits)
+ Bits[i] = (*LHSBits)[i];
+ else
+ Bits[i] = ValueBit(LHS, i);
+ } else {
AllDisjoint = false;
break;
}
@@ -1739,9 +1768,9 @@ class BitPermutationSelector {
if (Bits[i].hasValue()) {
LastVal = Bits[i].getValue();
LastIdx = Bits[i].getValueBitIndex();
- }
- else {
- if (LastVal) LastVal = SDValue();
+ } else {
+ if (LastVal)
+ LastVal = SDValue();
LastIdx = 0;
}
}
@@ -1753,17 +1782,19 @@ class BitPermutationSelector {
}
case ISD::ZERO_EXTEND: {
// We support only the case with zero extension from i32 to i64 so far.
- if (V.getValueType() != MVT::i64 ||
- V.getOperand(0).getValueType() != MVT::i32)
+ if (V.getValueType() != MVT::i64 || LHS.getValueType() != MVT::i32)
break;
- const SmallVector<ValueBit, 64> *LHSBits;
const unsigned NumOperandBits = 32;
- std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
- NumOperandBits);
-
- for (unsigned i = 0; i < NumOperandBits; ++i)
- Bits[i] = (*LHSBits)[i];
+ if (LHS.hasOneUse()) {
+ const SmallVector<ValueBit, 64> *LHSBits;
+ std::tie(Interesting, LHSBits) = getValueBits(LHS, NumOperandBits);
+ for (unsigned i = 0; i < NumOperandBits; ++i)
+ Bits[i] = (*LHSBits)[i];
+ } else {
+ for (unsigned i = 0; i < NumOperandBits; ++i)
+ Bits[i] = ValueBit(LHS, i);
+ }
for (unsigned i = NumOperandBits; i < NumBits; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1771,15 +1802,14 @@ class BitPermutationSelector {
return std::make_pair(Interesting, &Bits);
}
case ISD::TRUNCATE: {
- EVT FromType = V.getOperand(0).getValueType();
+ EVT FromType = LHS.getValueType();
EVT ToType = V.getValueType();
// We support only the case with truncate from i64 to i32.
- if (FromType != MVT::i64 || ToType != MVT::i32)
+ if (FromType != MVT::i64 || ToType != MVT::i32 || !LHS.hasOneUse())
break;
const unsigned NumAllBits = FromType.getSizeInBits();
SmallVector<ValueBit, 64> *InBits;
- std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
- NumAllBits);
+ std::tie(Interesting, InBits) = getValueBits(LHS, NumAllBits);
const unsigned NumValidBits = ToType.getSizeInBits();
// A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
@@ -1802,22 +1832,28 @@ class BitPermutationSelector {
// For AssertZext, we look through the operand and
// mark the bits known to be zero.
const SmallVector<ValueBit, 64> *LHSBits;
- std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
- NumBits);
- EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
+ EVT FromType = cast<VTSDNode>(RHS)->getVT();
const unsigned NumValidBits = FromType.getSizeInBits();
- for (unsigned i = 0; i < NumValidBits; ++i)
- Bits[i] = (*LHSBits)[i];
// These bits are known to be zero but the AssertZext may be from a value
// that already has some constant zero bits (i.e. from a masking and).
- for (unsigned i = NumValidBits; i < NumBits; ++i)
- Bits[i] = (*LHSBits)[i].hasValue()
- ? ValueBit((*LHSBits)[i].getValue(),
- (*LHSBits)[i].getValueBitIndex(),
- ValueBit::VariableKnownToBeZero)
- : ValueBit(ValueBit::ConstZero);
+ if (LHS.hasOneUse()) {
+ std::tie(Interesting, LHSBits) = getValueBits(LHS, NumBits);
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = (*LHSBits)[i];
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = (*LHSBits)[i].hasValue()
+ ? ValueBit((*LHSBits)[i].getValue(),
+ (*LHSBits)[i].getValueBitIndex(),
+ ValueBit::VariableKnownToBeZero)
+ : ValueBit(ValueBit::ConstZero);
+ } else {
+ for (unsigned i = 0; i < NumValidBits; ++i)
+ Bits[i] = ValueBit(LHS, i);
+ for (unsigned i = NumValidBits; i < NumBits; ++i)
+ Bits[i] = ValueBit(LHS, i, ValueBit::VariableKnownToBeZero);
+ }
return std::make_pair(Interesting, &Bits);
}
diff --git a/llvm/test/CodeGen/PowerPC/int128_ldst.ll b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
index 7f5f6a181c1b01c..b9afca4a892fe30 100644
--- a/llvm/test/CodeGen/PowerPC/int128_ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
@@ -208,11 +208,10 @@ entry:
define dso_local i128 @ld_or2___int128___int128(i64 %ptr, i8 zeroext %off) {
; CHECK-LABEL: ld_or2___int128___int128:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: rldicr 5, 3, 0, 51
-; CHECK-NEXT: rotldi 6, 3, 52
-; CHECK-NEXT: ldx 3, 5, 4
-; CHECK-NEXT: rldimi 4, 6, 12, 0
-; CHECK-NEXT: ld 4, 8(4)
+; CHECK-NEXT: rldicr 3, 3, 0, 51
+; CHECK-NEXT: or 5, 3, 4
+; CHECK-NEXT: ldx 3, 3, 4
+; CHECK-NEXT: ld 4, 8(5)
; CHECK-NEXT: blr
entry:
%and = and i64 %ptr, -4096
@@ -740,11 +739,10 @@ entry:
define dso_local void @st_or2__int128___int128(i64 %ptr, i8 zeroext %off, i128 %str) {
; CHECK-LABEL: st_or2__int128___int128:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: rldicr 7, 3, 0, 51
-; CHECK-NEXT: rotldi 3, 3, 52
-; CHECK-NEXT: stdx 5, 7, 4
-; CHECK-NEXT: rldimi 4, 3, 12, 0
-; CHECK-NEXT: std 6, 8(4)
+; CHECK-NEXT: rldicr 3, 3, 0, 51
+; CHECK-NEXT: or 7, 3, 4
+; CHECK-NEXT: stdx 5, 3, 4
+; CHECK-NEXT: std 6, 8(7)
; CHECK-NEXT: blr
entry:
%and = and i64 %ptr, -4096
diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
index 900069c6216bf6d..057af49c6a69e02 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
@@ -639,9 +639,9 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: #
; CHECK-NEXT: lbzu r0, 1(r5)
; CHECK-NEXT: mulli r29, r0, 171
-; CHECK-NEXT: rlwinm r28, r29, 24, 8, 30
-; CHECK-NEXT: srwi r29, r29, 9
-; CHECK-NEXT: add r29, r29, r28
+; CHECK-NEXT: srwi r28, r29, 9
+; CHECK-NEXT: rlwinm r29, r29, 24, 8, 30
+; CHECK-NEXT: add r29, r28, r29
; CHECK-NEXT: sub r0, r0, r29
; CHECK-NEXT: clrlwi r0, r0, 24
; CHECK-NEXT: cmplwi r0, 1
diff --git a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll
index 912a74ba8df8fb5..4e57f2f3926a11c 100644
--- a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll
+++ b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll
@@ -35,7 +35,7 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %
; CHECK-P9-NEXT: addi r8, r5, -8
; CHECK-P9-NEXT: lwz r5, 0(r7)
; CHECK-P9-NEXT: extsw r7, r4
-; CHECK-P9-NEXT: rldic r4, r3, 3, 29
+; CHECK-P9-NEXT: sldi r4, r3, 3
; CHECK-P9-NEXT: sub r3, r7, r3
; CHECK-P9-NEXT: addi r10, r4, 8
; CHECK-P9-NEXT: lxvdsx vs0, 0, r8
@@ -87,7 +87,7 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %
; CHECK-P10-NEXT: addi r8, r5, -8
; CHECK-P10-NEXT: lwz r5, 0(r7)
; CHECK-P10-NEXT: extsw r7, r4
-; CHECK-P10-NEXT: rldic r4, r3, 3, 29
+; CHECK-P10-NEXT: sldi r4, r3, 3
; CHECK-P10-NEXT: addi r10, r4, 8
; CHECK-P10-NEXT: sub r3, r7, r3
; CHECK-P10-NEXT: lxvdsx vs0, 0, r8
diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll
index 4e26ddfc37f99e3..a37bf852499cbba 100644
--- a/llvm/test/CodeGen/PowerPC/rldimi.ll
+++ b/llvm/test/CodeGen/PowerPC/rldimi.ll
@@ -17,11 +17,8 @@ entry:
define i64 @rldimi2(i64 %a) {
; CHECK-LABEL: rldimi2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mr 4, 3
-; CHECK-NEXT: rlwimi 4, 3, 8, 16, 23
-; CHECK-NEXT: rlwimi 4, 3, 16, 8, 15
-; CHECK-NEXT: rldimi 4, 3, 24, 0
-; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: rldimi 3, 3, 8, 0
+; CHECK-NEXT: rldimi 3, 3, 16, 0
; CHECK-NEXT: blr
entry:
%x0 = shl i64 %a, 8
@@ -36,15 +33,9 @@ entry:
define i64 @rldimi3(i64 %a) {
; CHECK-LABEL: rldimi3:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: rotldi 4, 3, 32
-; CHECK-NEXT: rlwimi 4, 3, 0, 24, 31
-; CHECK-NEXT: rlwimi 4, 3, 8, 16, 23
-; CHECK-NEXT: rlwimi 4, 3, 16, 8, 15
-; CHECK-NEXT: rlwimi 4, 3, 24, 0, 7
-; CHECK-NEXT: rldimi 4, 3, 40, 16
-; CHECK-NEXT: rldimi 4, 3, 48, 8
-; CHECK-NEXT: rldimi 4, 3, 56, 0
-; CHECK-NEXT: mr 3, 4
+; CHECK-NEXT: rldimi 3, 3, 8, 0
+; CHECK-NEXT: rldimi 3, 3, 16, 0
+; CHECK-NEXT: rlwinm 3, 3, 0, 1, 0
; CHECK-NEXT: blr
entry:
%0 = shl i64 %a, 8
More information about the cfe-commits
mailing list