[llvm] [PowerPC] Emit lxvkq and vsrq instructions for build vector patterns (PR #157625)
Tony Varghese via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 11:48:16 PDT 2025
https://github.com/tonykuttai updated https://github.com/llvm/llvm-project/pull/157625
>From 85a8234c8f655b18592671affbbb9f80aac1dcd8 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Tue, 9 Sep 2025 14:57:54 +0000
Subject: [PATCH] Emit lxvkq and vsrq instructions for build vector patterns
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 134 ++++++++
llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 +
.../CodeGen/PowerPC/lxvkq-vec-constant.ll | 307 ++++++++++++++++++
.../test/CodeGen/PowerPC/vector-reduce-add.ll | 17 +-
4 files changed, 452 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fa104e4f69d7f..c76347d48bc62 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9679,6 +9679,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+ // Recognize build vector patterns to emit VSX vector instructions
+ // instead of loading value from memory.
+ if (Subtarget.isISA3_1() && Subtarget.hasVSX()) {
+ if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
+ return VecPat;
+ }
+
if (Subtarget.hasP10Vector()) {
APInt BitMask(32, 0);
// If the value of the vector is all zeros or all ones,
@@ -15657,6 +15664,133 @@ combineElementTruncationToVectorTruncation(SDNode *N,
return SDValue();
}
+// LXVKQ instruction load VSX vector with a special quadword value
+// based on an immediate value. This helper method returns the details of the
+// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
+// to help generate the LXVKQ instruction and the subsequent shift instruction
+// required to match the original build vector pattern.
+
+// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
+using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
+
+static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
+
+ static const auto BaseLXVKQPatterns = []() {
+ // LXVKQ instruction loads the Quadword value:
+ // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
+ return std::array<std::pair<APInt, uint32_t>, 1>{
+ {{APInt(128, 0x8000000000000000ULL) << 64, 16}}};
+ }();
+
+ // Check for direct LXVKQ match (no shift needed)
+ for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
+ if (FullVal == BasePattern)
+ return std::make_tuple(Uim, uint8_t{0});
+ }
+
+ // Check if FullValue can be generated by (right) shifting a base pattern
+ for (const auto &[BasePattern, Uim] : BaseLXVKQPatterns) {
+ if (BasePattern.lshr(127) == FullVal)
+ return std::make_tuple(Uim, uint8_t{127});
+ }
+
+ return std::nullopt;
+}
+
+/// Combine vector loads to a single load by recognising patterns in the Build
+/// Vector. LXVKQ instruction load VSX vector with a special quadword value
+/// based on an immediate value.
+SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
+ "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
+
+ // This transformation is only supported if we are loading either a byte,
+ // halfword, word, or doubleword.
+ EVT VT = Op.getValueType();
+ if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
+ VT == MVT::v2i64))
+ return SDValue();
+
+ LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
+ << VT.getEVTString() << "): ";
+ Op->dump());
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ElemBits = VT.getScalarSizeInBits();
+
+ bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
+
+ // Check for Non-constant operand in the build vector.
+ for (const SDValue &Operand : Op.getNode()->op_values()) {
+ if (!isa<ConstantSDNode>(Operand))
+ return SDValue();
+ }
+
+ // Assemble build vector operands as a 128-bit register value
+ // We need to reconstruct what the 128-bit register pattern would be
+ // that produces this vector when interpreted with the current endianness
+ APInt FullVal = APInt::getZero(128);
+
+ for (unsigned Index = 0; Index < NumElems; ++Index) {
+ auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
+
+ // Get element value as raw bits (zero-extended)
+ uint64_t ElemValue = C->getZExtValue();
+
+ // Mask to element size to ensure we only get the relevant bits
+ if (ElemBits < 64)
+ ElemValue &= ((1ULL << ElemBits) - 1);
+
+ // Calculate bit position for this element in the 128-bit register
+ unsigned BitPos =
+ (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
+
+ // Create APInt for the element value and shift it to correct position
+ APInt ElemAPInt(128, ElemValue);
+ ElemAPInt <<= BitPos;
+
+ // Place the element value at the correct bit position
+ FullVal |= ElemAPInt;
+ }
+
+ if (auto UIMOpt = getPatternInfo(FullVal)) {
+ const auto &[Uim, ShiftAmount] = *UIMOpt;
+ SDLoc Dl(Op);
+
+ // Generate LXVKQ instruction if the shift amount is zero.
+ if (ShiftAmount == 0) {
+ SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
+ SDValue LxvkqInstr =
+ SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
+ LLVM_DEBUG(llvm::dbgs()
+ << "combineBVLoadsSpecialValue: Instruction Emitted ";
+ LxvkqInstr.dump());
+ return LxvkqInstr;
+ }
+
+ // The right shifted pattern can be constructed using a combination of
+ // XXSPLITIB and VSRQ instruction. VSRQ uses the shift amount from the lower
+ // 7 bits of byte 15. This can be specified using XXSPLITIB with immediate
+ // value 255.
+ SDValue ShiftAmountVec =
+ SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
+ DAG.getTargetConstant(255, Dl, MVT::i32)),
+ 0);
+ // Generate appropriate right shift instruction
+ SDValue ShiftVec = SDValue(
+ DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
+ 0);
+ LLVM_DEBUG(llvm::dbgs()
+ << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
+ ShiftVec.dump());
+ return ShiftVec;
+ }
+ // No patterns matched for build vectors.
+ return SDValue();
+}
+
/// Reduce the number of loads when building a vector.
///
/// Building a vector out of multiple loads can be converted to a load
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 669430550f4e6..97382cd8f613c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1471,6 +1471,9 @@ namespace llvm {
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
+ SDValue combineBVLoadsSpecialValue(SDValue Operand,
+ SelectionDAG &DAG) const;
+
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
/// essentially any shuffle of v8i16 vectors that just inserts one element
diff --git a/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll b/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll
new file mode 100644
index 0000000000000..0ee4524a6c68a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll
@@ -0,0 +1,307 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC64-LE-10
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-unknown-unknown \
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC64-BE-10
+
+; Test LXVKQ instruction generation for special vector constants matching 128 bit patterns:
+; 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
+; 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
+
+; =============================================================================
+; v2i64 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000)
+; =============================================================================
+
+; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-9223372036854775808, 0>
+define dso_local noundef <2 x i64> @test_v2i64_msb_set_bigendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v2i64_msb_set_bigendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: plxv v2, .LCPI0_0 at PCREL(0), 1
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v2i64_msb_set_bigendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: lxvkq v2, 16
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <2 x i64> <i64 -9223372036854775808, i64 0>
+}
+
+; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, -9223372036854775808>
+define dso_local noundef <2 x i64> @test_v2i64_msb_set_littleendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v2i64_msb_set_littleendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: lxvkq v2, 16
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v2i64_msb_set_littleendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; POWERPC64-BE-10-NEXT: lxv v2, 0(r3)
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <2 x i64> <i64 0, i64 -9223372036854775808>
+}
+
+; =============================================================================
+; v4i32 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000)
+; =============================================================================
+
+; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-2147483648, 0, 0, 0>
+define dso_local noundef <4 x i32> @test_v4i32_msb_set_bigendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v4i32_msb_set_bigendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: plxv v2, .LCPI2_0 at PCREL(0), 1
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v4i32_msb_set_bigendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: lxvkq v2, 16
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <4 x i32> <i32 -2147483648, i32 0, i32 0, i32 0>
+}
+
+; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, -2147483648>
+define dso_local noundef <4 x i32> @test_v4i32_msb_set_littleendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v4i32_msb_set_littleendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: lxvkq v2, 16
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v4i32_msb_set_littleendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; POWERPC64-BE-10-NEXT: lxv v2, 0(r3)
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <4 x i32> <i32 0, i32 0, i32 0, i32 -2147483648>
+}
+
+; =============================================================================
+; v8i16 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000)
+; =============================================================================
+
+; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-32768, 0, 0, 0, 0, 0, 0, 0>
+define dso_local noundef <8 x i16> @test_v8i16_msb_set_bigendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v8i16_msb_set_bigendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: plxv v2, .LCPI4_0 at PCREL(0), 1
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v8i16_msb_set_bigendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: lxvkq v2, 16
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <8 x i16> <i16 -32768, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+}
+
+; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, 0, 0, 0, 0, -32768>
+define dso_local noundef <8 x i16> @test_v8i16_msb_set_littleendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v8i16_msb_set_littleendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: lxvkq v2, 16
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v8i16_msb_set_littleendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
+; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI5_0 at toc@l
+; POWERPC64-BE-10-NEXT: lxv v2, 0(r3)
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 -32768>
+}
+
+; =============================================================================
+; v16i8 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000)
+; =============================================================================
+
+; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
+define dso_local noundef <16 x i8> @test_v16i8_msb_set_bigendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v16i8_msb_set_bigendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: plxv v2, .LCPI6_0 at PCREL(0), 1
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v16i8_msb_set_bigendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: lxvkq v2, 16
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <16 x i8> <i8 -128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+}
+
+; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128>
+define dso_local noundef <16 x i8> @test_v16i8_msb_set_littleendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v16i8_msb_set_littleendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: lxvkq v2, 16
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v16i8_msb_set_littleendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI7_0 at toc@ha
+; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI7_0 at toc@l
+; POWERPC64-BE-10-NEXT: lxv v2, 0(r3)
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -128>
+}
+
+; =============================================================================
+; v2i64 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001)
+; =============================================================================
+
+; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 1>
+define dso_local noundef <2 x i64> @test_v2i64_lsb_set_bigendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v2i64_lsb_set_bigendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: plxv v2, .LCPI8_0 at PCREL(0), 1
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v2i64_lsb_set_bigendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: xxspltib v2, 255
+; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <2 x i64> <i64 0, i64 1>
+}
+
+; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0>
+define dso_local noundef <2 x i64> @test_v2i64_lsb_set_littleendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v2i64_lsb_set_littleendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: xxspltib v2, 255
+; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v2i64_lsb_set_littleendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI9_0 at toc@ha
+; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI9_0 at toc@l
+; POWERPC64-BE-10-NEXT: lxv v2, 0(r3)
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <2 x i64> <i64 1, i64 0>
+}
+
+; =============================================================================
+; v4i32 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001)
+; =============================================================================
+
+; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 1>
+define dso_local noundef <4 x i32> @test_v4i32_lsb_set_bigendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v4i32_lsb_set_bigendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: plxv v2, .LCPI10_0 at PCREL(0), 1
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v4i32_lsb_set_bigendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: xxspltib v2, 255
+; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+}
+
+; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0>
+define dso_local noundef <4 x i32> @test_v4i32_lsb_set_littleendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v4i32_lsb_set_littleendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: xxspltib v2, 255
+; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v4i32_lsb_set_littleendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI11_0 at toc@ha
+; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI11_0 at toc@l
+; POWERPC64-BE-10-NEXT: lxv v2, 0(r3)
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+}
+
+; =============================================================================
+; v8i16 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001)
+; =============================================================================
+
+; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 0, 0, 0, 0, 1>
+define dso_local noundef <8 x i16> @test_v8i16_lsb_set_bigendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v8i16_lsb_set_bigendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: plxv v2, .LCPI12_0 at PCREL(0), 1
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v8i16_lsb_set_bigendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: xxspltib v2, 255
+; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 1>
+}
+
+; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0, 0, 0, 0, 0>
+define dso_local noundef <8 x i16> @test_v8i16_lsb_set_littleendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v8i16_lsb_set_littleendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: xxspltib v2, 255
+; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v8i16_lsb_set_littleendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI13_0 at toc@ha
+; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI13_0 at toc@l
+; POWERPC64-BE-10-NEXT: lxv v2, 0(r3)
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+}
+
+; =============================================================================
+; v16i8 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001)
+; =============================================================================
+
+; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>
+define dso_local noundef <16 x i8> @test_v16i8_lsb_set_bigendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v16i8_lsb_set_bigendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: plxv v2, .LCPI14_0 at PCREL(0), 1
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v16i8_lsb_set_bigendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: xxspltib v2, 255
+; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1>
+}
+
+; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
+define dso_local noundef <16 x i8> @test_v16i8_lsb_set_littleendian() local_unnamed_addr {
+; POWERPC64-LE-10-LABEL: test_v16i8_lsb_set_littleendian:
+; POWERPC64-LE-10: # %bb.0: # %entry
+; POWERPC64-LE-10-NEXT: xxspltib v2, 255
+; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2
+; POWERPC64-LE-10-NEXT: blr
+;
+; POWERPC64-BE-10-LABEL: test_v16i8_lsb_set_littleendian:
+; POWERPC64-BE-10: # %bb.0: # %entry
+; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI15_0 at toc@ha
+; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI15_0 at toc@l
+; POWERPC64-BE-10-NEXT: lxv v2, 0(r3)
+; POWERPC64-BE-10-NEXT: blr
+entry:
+ ret <16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
index 0892210fc7443..d506d2062be5c 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
@@ -1566,12 +1566,16 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR10BE-LABEL: v16i8tov16i64_sign:
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: addis r3, r2, .LCPI23_0 at toc@ha
+; PWR10BE-NEXT: xxspltib v1, 255
; PWR10BE-NEXT: addi r3, r3, .LCPI23_0 at toc@l
+; PWR10BE-NEXT: vsrq v1, v1, v1
; PWR10BE-NEXT: lxv v3, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_1 at toc@ha
; PWR10BE-NEXT: addi r3, r3, .LCPI23_1 at toc@l
+; PWR10BE-NEXT: vperm v1, v2, v2, v1
; PWR10BE-NEXT: lxv v4, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_2 at toc@ha
+; PWR10BE-NEXT: vextsb2d v1, v1
; PWR10BE-NEXT: vperm v3, v2, v2, v3
; PWR10BE-NEXT: addi r3, r3, .LCPI23_2 at toc@l
; PWR10BE-NEXT: vextsb2d v3, v3
@@ -1585,23 +1589,18 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR10BE-NEXT: vperm v5, v2, v2, v5
; PWR10BE-NEXT: addi r3, r3, .LCPI23_4 at toc@l
; PWR10BE-NEXT: vextsb2d v5, v5
-; PWR10BE-NEXT: lxv v1, 0(r3)
+; PWR10BE-NEXT: lxv v6, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_5 at toc@ha
; PWR10BE-NEXT: vperm v0, v2, v2, v0
; PWR10BE-NEXT: addi r3, r3, .LCPI23_5 at toc@l
; PWR10BE-NEXT: vextsb2d v0, v0
-; PWR10BE-NEXT: lxv v6, 0(r3)
+; PWR10BE-NEXT: lxv v7, 0(r3)
; PWR10BE-NEXT: addis r3, r2, .LCPI23_6 at toc@ha
-; PWR10BE-NEXT: vperm v1, v2, v2, v1
+; PWR10BE-NEXT: vperm v6, v2, v2, v6
; PWR10BE-NEXT: vaddudm v5, v0, v5
; PWR10BE-NEXT: vaddudm v3, v4, v3
; PWR10BE-NEXT: vaddudm v3, v3, v5
; PWR10BE-NEXT: addi r3, r3, .LCPI23_6 at toc@l
-; PWR10BE-NEXT: vextsb2d v1, v1
-; PWR10BE-NEXT: lxv v7, 0(r3)
-; PWR10BE-NEXT: addis r3, r2, .LCPI23_7 at toc@ha
-; PWR10BE-NEXT: vperm v6, v2, v2, v6
-; PWR10BE-NEXT: addi r3, r3, .LCPI23_7 at toc@l
; PWR10BE-NEXT: vextsb2d v6, v6
; PWR10BE-NEXT: lxv v8, 0(r3)
; PWR10BE-NEXT: vperm v7, v2, v2, v7
@@ -1609,7 +1608,7 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR10BE-NEXT: vperm v2, v2, v2, v8
; PWR10BE-NEXT: vextsb2d v2, v2
; PWR10BE-NEXT: vaddudm v2, v2, v7
-; PWR10BE-NEXT: vaddudm v4, v6, v1
+; PWR10BE-NEXT: vaddudm v4, v1, v6
; PWR10BE-NEXT: vaddudm v2, v4, v2
; PWR10BE-NEXT: vaddudm v2, v2, v3
; PWR10BE-NEXT: xxswapd v3, v2
More information about the llvm-commits
mailing list