[llvm] 9695027 - [PowerPC] address post-commit comments for D106555; NFC
Chen Zheng via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 4 22:31:34 PDT 2021
Author: Chen Zheng
Date: 2021-11-05T05:30:53Z
New Revision: 96950270669acd3c342a266562ff3a41464cc0a0
URL: https://github.com/llvm/llvm-project/commit/96950270669acd3c342a266562ff3a41464cc0a0
DIFF: https://github.com/llvm/llvm-project/commit/96950270669acd3c342a266562ff3a41464cc0a0.diff
LOG: [PowerPC] address post-commit comments for D106555; NFC
Address namanjai post commit comments.
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index e192efd9dff21..d63044c9760d6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -5826,66 +5826,67 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
}
case PPCISD::LD_SPLAT: {
- // For v16i8 and v8i16, if target has no direct move, we can still handle
- // this without using stack.
- if (Subtarget->hasAltivec() && !Subtarget->hasDirectMove()) {
- SDValue ZeroReg =
- CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
- Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
- unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
- EVT Type = N->getValueType(0);
- if (Type == MVT::v16i8 || Type == MVT::v8i16) {
- // v16i8 LD_SPLAT addr
- // ======>
- // Mask = LVSR/LVSL 0, addr
- // LoadLow = LXV 0, addr
- // Perm = VPERM LoadLow, LoadLow, Mask
- // Splat = VSPLTB 15/0, Perm
- //
- // v8i16 LD_SPLAT addr
- // ======>
- // Mask = LVSR/LVSL 0, addr
- // LoadLow = LXV 0, addr
- // LoadHigh = LXV (LI, 1), addr
- // Perm = VPERM LoadLow, LoadHigh, Mask
- // Splat = VSPLTH 7/0, Perm
- unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
- unsigned SplatElemIndex =
- Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
-
- SDNode *Mask = CurDAG->getMachineNode(
- Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type,
- ZeroReg, N->getOperand(1));
-
- SDNode *LoadLow = CurDAG->getMachineNode(
- PPC::LVX, dl, MVT::v16i8, MVT::Other,
- {ZeroReg, N->getOperand(1), N->getOperand(0)});
-
- SDNode *LoadHigh = LoadLow;
- if (Type == MVT::v8i16) {
- LoadHigh = CurDAG->getMachineNode(
- PPC::LVX, dl, MVT::v16i8, MVT::Other,
- {SDValue(CurDAG->getMachineNode(
- LIOpcode, dl, MVT::i32,
- CurDAG->getTargetConstant(1, dl, MVT::i8)),
- 0),
- N->getOperand(1), SDValue(LoadLow, 1)});
- }
+ // Here we want to handle splat load for type v16i8 and v8i16 when there is
+ // no direct move, we don't need to use stack for this case. If target has
+ // direct move, we should be able to get the best selection in the .td file.
+ if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
+ break;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
- transferMemOperands(N, LoadHigh);
+ EVT Type = N->getValueType(0);
+ if (Type != MVT::v16i8 && Type != MVT::v8i16)
+ break;
- SDNode *Perm =
- CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
- SDValue(LoadHigh, 0), SDValue(Mask, 0));
- CurDAG->SelectNodeTo(
- N, SplatOp, Type,
- CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
- SDValue(Perm, 0));
- return;
- }
- }
- break;
+ SDValue ZeroReg =
+ CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
+ unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
+ // v16i8 LD_SPLAT addr
+ // ======>
+ // Mask = LVSR/LVSL 0, addr
+ // LoadLow = LXV 0, addr
+ // Perm = VPERM LoadLow, LoadLow, Mask
+ // Splat = VSPLTB 15/0, Perm
+ //
+ // v8i16 LD_SPLAT addr
+ // ======>
+ // Mask = LVSR/LVSL 0, addr
+ // LoadLow = LXV 0, addr
+ // LoadHigh = LXV (LI, 1), addr
+ // Perm = VPERM LoadLow, LoadHigh, Mask
+ // Splat = VSPLTH 7/0, Perm
+ unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
+ unsigned SplatElemIndex =
+ Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
+
+ SDNode *Mask = CurDAG->getMachineNode(
+ Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
+ N->getOperand(1));
+
+ SDNode *LoadLow =
+ CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
+ {ZeroReg, N->getOperand(1), N->getOperand(0)});
+
+ SDNode *LoadHigh = LoadLow;
+ if (Type == MVT::v8i16) {
+ LoadHigh = CurDAG->getMachineNode(
+ PPC::LVX, dl, MVT::v16i8, MVT::Other,
+ {SDValue(CurDAG->getMachineNode(
+ LIOpcode, dl, MVT::i32,
+ CurDAG->getTargetConstant(1, dl, MVT::i8)),
+ 0),
+ N->getOperand(1), SDValue(LoadLow, 1)});
+ }
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
+ transferMemOperands(N, LoadHigh);
+
+ SDNode *Perm =
+ CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
+ SDValue(LoadHigh, 0), SDValue(Mask, 0));
+ CurDAG->SelectNodeTo(N, SplatOp, Type,
+ CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
+ SDValue(Perm, 0));
+ return;
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1ca59feef42e8..0f9b70406c369 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9079,8 +9079,8 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
return true;
if (Ty == MVT::v2i64) {
- // check the extend type if the input is i32 while the output vector type is
- // v2i64.
+ // Check the extend type, when the input type is i32, and the output vector
+ // type is v2i64.
if (cast<LoadSDNode>(Op.getOperand(0))->getMemoryVT() == MVT::i32) {
if (ISD::isZEXTLoad(InputNode))
Opcode = PPCISD::ZEXT_LD_SPLAT;
@@ -9164,8 +9164,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
const SDValue *InputLoad = &Op.getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
- unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits() *
- ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
+ // If the input load is an extending load, it will be an i32 -> i64
+ // extending load and isValidSplatLoad() will update NewOpcode.
+ unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
+ unsigned ElementSize =
+ MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
+
+ assert(((ElementSize == 2 * MemorySize)
+ ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
+ NewOpcode == PPCISD::SEXT_LD_SPLAT)
+ : (NewOpcode == PPCISD::LD_SPLAT)) &&
+ "Unmatched element size and opcode!\n");
// Checking for a single use of this load, we have to check for vector
// width (128 bits) / ElementSize uses (since each operand of the
@@ -9175,7 +9184,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (BVInOp.isUndef())
NumUsesOfInputLD--;
- // Execlude somes case where LD_SPLAT is worse than scalar_to_vector:
+ // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
// Below cases should also happen for "lfiwzx/lfiwax + LE target + index
// 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
// 15", but funciton IsValidSplatLoad() now will only return true when
@@ -9193,22 +9202,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
Subtarget.hasLFIWAX()))
return SDValue();
- // case 2 - lxvrhx
- // 2.1: load result is i16;
- // 2.2: build a v8i16 vector with above loaded value;
+ // case 2 - lxvr[hb]x
+ // 2.1: load result is at most i16;
+ // 2.2: build a vector with above loaded value;
// 2.3: the vector has only one value at index 0, others are all undef;
- // 2.4: on LE target, so that lxvrhx does not need any permute.
- if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
- Subtarget.isISA3_1() && Op->getValueType(0) == MVT::v16i8)
- return SDValue();
-
- // case 3 - lxvrbx
- // 3.1: load result is i8;
- // 3.2: build a v16i8 vector with above loaded value;
- // 3.3: the vector has only one value at index 0, others are all undef;
- // 3.4: on LE target, so that lxvrbx does not need any permute.
+ // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
- Subtarget.isISA3_1() && Op->getValueType(0) == MVT::v8i16)
+ Subtarget.isISA3_1() && ElementSize <= 16)
return SDValue();
assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
More information about the llvm-commits
mailing list