[llvm] [PowerPC] extend smaller splats into bigger splats (with fix) (PR #142194)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 30 11:31:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: None (RolandF77)
<details>
<summary>Changes</summary>
For pwr9, xxspltib is a byte splat with a range -128 to 127 - it can be used with a following vector extend sign to make splats of i16, i32, or i64 element size. For pwr8, vspltisw with a following vector extend sign can be used to make splats of i64 elements in the range -16 to 15.
Add check for P8 to make sure the 64-bit vector ops are there.
---
Patch is 99.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142194.diff
9 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+47-6)
- (modified) llvm/test/CodeGen/PowerPC/build-vector-tests.ll (+48-72)
- (modified) llvm/test/CodeGen/PowerPC/mul-const-vector.ll (+6-12)
- (modified) llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll (+2-3)
- (modified) llvm/test/CodeGen/PowerPC/pre-inc-disable.ll (+16-16)
- (added) llvm/test/CodeGen/PowerPC/splat-extend.ll (+50)
- (modified) llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll (+2-3)
- (modified) llvm/test/CodeGen/PowerPC/vector-extend-sign.ll (+2-3)
- (modified) llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll (+368-536)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c39b9d55cc212..b3348a4cb184b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9664,7 +9664,25 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
- if (!BVNIsConstantSplat || SplatBitSize > 32) {
+ bool IsSplat64 = false;
+ uint64_t SplatBits = 0;
+ int32_t SextVal = 0;
+ if (BVNIsConstantSplat) {
+ if (SplatBitSize <= 32) {
+ SplatBits = APSplatBits.getZExtValue();
+ SextVal = SignExtend32(SplatBits, SplatBitSize);
+ } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
+ int64_t Splat64Val = APSplatBits.getSExtValue();
+ SplatBits = (uint64_t)Splat64Val;
+ SextVal = (int32_t)SplatBits;
+ bool P9Vector = Subtarget.hasP9Vector();
+ int32_t Hi = P9Vector ? 127 : 15;
+ int32_t Lo = P9Vector ? -128 : -16;
+ IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
+ }
+ }
+
+ if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
unsigned NewOpcode = PPCISD::LD_SPLAT;
// Handle load-and-splat patterns as we have instructions that will do this
@@ -9750,7 +9768,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
- uint64_t SplatBits = APSplatBits.getZExtValue();
uint64_t SplatUndef = APSplatUndef.getZExtValue();
unsigned SplatSize = SplatBitSize / 8;
@@ -9785,13 +9802,37 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
dl);
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
- int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
- if (SextVal >= -16 && SextVal <= 15)
- return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
- dl);
+ // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
+ if (SextVal >= -16 && SextVal <= 15) {
+ unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
+ SDValue Res =
+ getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
+ if (SplatSize != 8)
+ return Res;
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
+ }
// Two instruction sequences.
+ if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
+ SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
+ SmallVector<SDValue, 16> Ops(16, C);
+ SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
+ assert((SplatSize == 2 || SplatSize == 4 || SplatSize == 8) &&
+ "Unexpected type for vector constant.");
+ unsigned IID;
+ if (SplatSize == 2) {
+ IID = Intrinsic::ppc_altivec_vupklsb;
+ } else if (SplatSize == 4) {
+ IID = Intrinsic::ppc_altivec_vextsb2w;
+ } else { // SplatSize == 8
+ IID = Intrinsic::ppc_altivec_vextsb2d;
+ }
+ SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl);
+ return DAG.getBitcast(Op->getValueType(0), Extend);
+ }
+ assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
+
// If this value is in the range [-32,30] and is even, use:
// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
// If this value is in the range [17,31] and is odd, use:
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 91431ed15f6a7..9dd0fbe4474b1 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -3713,30 +3713,26 @@ entry:
define <2 x i64> @spltConst1ll() {
; P9BE-LABEL: spltConst1ll:
; P9BE: # %bb.0: # %entry
-; P9BE-NEXT: addis r3, r2, .LCPI65_0 at toc@ha
-; P9BE-NEXT: addi r3, r3, .LCPI65_0 at toc@l
-; P9BE-NEXT: lxv v2, 0(r3)
+; P9BE-NEXT: vspltisw v2, 1
+; P9BE-NEXT: vupklsw v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: spltConst1ll:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: addis r3, r2, .LCPI65_0 at toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI65_0 at toc@l
-; P9LE-NEXT: lxv v2, 0(r3)
+; P9LE-NEXT: vspltisw v2, 1
+; P9LE-NEXT: vupklsw v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: spltConst1ll:
; P8BE: # %bb.0: # %entry
-; P8BE-NEXT: addis r3, r2, .LCPI65_0 at toc@ha
-; P8BE-NEXT: addi r3, r3, .LCPI65_0 at toc@l
-; P8BE-NEXT: lxvd2x v2, 0, r3
+; P8BE-NEXT: vspltisw v2, 1
+; P8BE-NEXT: vupklsw v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: spltConst1ll:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r3, r2, .LCPI65_0 at toc@ha
-; P8LE-NEXT: addi r3, r3, .LCPI65_0 at toc@l
-; P8LE-NEXT: lxvd2x v2, 0, r3
+; P8LE-NEXT: vspltisw v2, 1
+; P8LE-NEXT: vupklsw v2, v2
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 1, i64 1>
@@ -4173,30 +4169,26 @@ entry:
define <2 x i64> @spltCnstConvftoll() {
; P9BE-LABEL: spltCnstConvftoll:
; P9BE: # %bb.0: # %entry
-; P9BE-NEXT: addis r3, r2, .LCPI78_0 at toc@ha
-; P9BE-NEXT: addi r3, r3, .LCPI78_0 at toc@l
-; P9BE-NEXT: lxv v2, 0(r3)
+; P9BE-NEXT: vspltisw v2, 4
+; P9BE-NEXT: vupklsw v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: spltCnstConvftoll:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: addis r3, r2, .LCPI78_0 at toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI78_0 at toc@l
-; P9LE-NEXT: lxv v2, 0(r3)
+; P9LE-NEXT: vspltisw v2, 4
+; P9LE-NEXT: vupklsw v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: spltCnstConvftoll:
; P8BE: # %bb.0: # %entry
-; P8BE-NEXT: addis r3, r2, .LCPI78_0 at toc@ha
-; P8BE-NEXT: addi r3, r3, .LCPI78_0 at toc@l
-; P8BE-NEXT: lxvd2x v2, 0, r3
+; P8BE-NEXT: vspltisw v2, 4
+; P8BE-NEXT: vupklsw v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: spltCnstConvftoll:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r3, r2, .LCPI78_0 at toc@ha
-; P8LE-NEXT: addi r3, r3, .LCPI78_0 at toc@l
-; P8LE-NEXT: lxvd2x v2, 0, r3
+; P8LE-NEXT: vspltisw v2, 4
+; P8LE-NEXT: vupklsw v2, v2
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 4, i64 4>
@@ -4526,30 +4518,26 @@ entry:
define <2 x i64> @spltCnstConvdtoll() {
; P9BE-LABEL: spltCnstConvdtoll:
; P9BE: # %bb.0: # %entry
-; P9BE-NEXT: addis r3, r2, .LCPI87_0 at toc@ha
-; P9BE-NEXT: addi r3, r3, .LCPI87_0 at toc@l
-; P9BE-NEXT: lxv v2, 0(r3)
+; P9BE-NEXT: vspltisw v2, 4
+; P9BE-NEXT: vupklsw v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: spltCnstConvdtoll:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: addis r3, r2, .LCPI87_0 at toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI87_0 at toc@l
-; P9LE-NEXT: lxv v2, 0(r3)
+; P9LE-NEXT: vspltisw v2, 4
+; P9LE-NEXT: vupklsw v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: spltCnstConvdtoll:
; P8BE: # %bb.0: # %entry
-; P8BE-NEXT: addis r3, r2, .LCPI87_0 at toc@ha
-; P8BE-NEXT: addi r3, r3, .LCPI87_0 at toc@l
-; P8BE-NEXT: lxvd2x v2, 0, r3
+; P8BE-NEXT: vspltisw v2, 4
+; P8BE-NEXT: vupklsw v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: spltCnstConvdtoll:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r3, r2, .LCPI87_0 at toc@ha
-; P8LE-NEXT: addi r3, r3, .LCPI87_0 at toc@l
-; P8LE-NEXT: lxvd2x v2, 0, r3
+; P8LE-NEXT: vspltisw v2, 4
+; P8LE-NEXT: vupklsw v2, v2
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 4, i64 4>
@@ -4879,30 +4867,26 @@ entry:
define <2 x i64> @spltConst1ull() {
; P9BE-LABEL: spltConst1ull:
; P9BE: # %bb.0: # %entry
-; P9BE-NEXT: addis r3, r2, .LCPI97_0 at toc@ha
-; P9BE-NEXT: addi r3, r3, .LCPI97_0 at toc@l
-; P9BE-NEXT: lxv v2, 0(r3)
+; P9BE-NEXT: vspltisw v2, 1
+; P9BE-NEXT: vupklsw v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: spltConst1ull:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: addis r3, r2, .LCPI97_0 at toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI97_0 at toc@l
-; P9LE-NEXT: lxv v2, 0(r3)
+; P9LE-NEXT: vspltisw v2, 1
+; P9LE-NEXT: vupklsw v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: spltConst1ull:
; P8BE: # %bb.0: # %entry
-; P8BE-NEXT: addis r3, r2, .LCPI97_0 at toc@ha
-; P8BE-NEXT: addi r3, r3, .LCPI97_0 at toc@l
-; P8BE-NEXT: lxvd2x v2, 0, r3
+; P8BE-NEXT: vspltisw v2, 1
+; P8BE-NEXT: vupklsw v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: spltConst1ull:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r3, r2, .LCPI97_0 at toc@ha
-; P8LE-NEXT: addi r3, r3, .LCPI97_0 at toc@l
-; P8LE-NEXT: lxvd2x v2, 0, r3
+; P8LE-NEXT: vspltisw v2, 1
+; P8LE-NEXT: vupklsw v2, v2
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 1, i64 1>
@@ -5339,30 +5323,26 @@ entry:
define <2 x i64> @spltCnstConvftoull() {
; P9BE-LABEL: spltCnstConvftoull:
; P9BE: # %bb.0: # %entry
-; P9BE-NEXT: addis r3, r2, .LCPI110_0 at toc@ha
-; P9BE-NEXT: addi r3, r3, .LCPI110_0 at toc@l
-; P9BE-NEXT: lxv v2, 0(r3)
+; P9BE-NEXT: vspltisw v2, 4
+; P9BE-NEXT: vupklsw v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: spltCnstConvftoull:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: addis r3, r2, .LCPI110_0 at toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI110_0 at toc@l
-; P9LE-NEXT: lxv v2, 0(r3)
+; P9LE-NEXT: vspltisw v2, 4
+; P9LE-NEXT: vupklsw v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: spltCnstConvftoull:
; P8BE: # %bb.0: # %entry
-; P8BE-NEXT: addis r3, r2, .LCPI110_0 at toc@ha
-; P8BE-NEXT: addi r3, r3, .LCPI110_0 at toc@l
-; P8BE-NEXT: lxvd2x v2, 0, r3
+; P8BE-NEXT: vspltisw v2, 4
+; P8BE-NEXT: vupklsw v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: spltCnstConvftoull:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r3, r2, .LCPI110_0 at toc@ha
-; P8LE-NEXT: addi r3, r3, .LCPI110_0 at toc@l
-; P8LE-NEXT: lxvd2x v2, 0, r3
+; P8LE-NEXT: vspltisw v2, 4
+; P8LE-NEXT: vupklsw v2, v2
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 4, i64 4>
@@ -5692,30 +5672,26 @@ entry:
define <2 x i64> @spltCnstConvdtoull() {
; P9BE-LABEL: spltCnstConvdtoull:
; P9BE: # %bb.0: # %entry
-; P9BE-NEXT: addis r3, r2, .LCPI119_0 at toc@ha
-; P9BE-NEXT: addi r3, r3, .LCPI119_0 at toc@l
-; P9BE-NEXT: lxv v2, 0(r3)
+; P9BE-NEXT: vspltisw v2, 4
+; P9BE-NEXT: vupklsw v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: spltCnstConvdtoull:
; P9LE: # %bb.0: # %entry
-; P9LE-NEXT: addis r3, r2, .LCPI119_0 at toc@ha
-; P9LE-NEXT: addi r3, r3, .LCPI119_0 at toc@l
-; P9LE-NEXT: lxv v2, 0(r3)
+; P9LE-NEXT: vspltisw v2, 4
+; P9LE-NEXT: vupklsw v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: spltCnstConvdtoull:
; P8BE: # %bb.0: # %entry
-; P8BE-NEXT: addis r3, r2, .LCPI119_0 at toc@ha
-; P8BE-NEXT: addi r3, r3, .LCPI119_0 at toc@l
-; P8BE-NEXT: lxvd2x v2, 0, r3
+; P8BE-NEXT: vspltisw v2, 4
+; P8BE-NEXT: vupklsw v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: spltCnstConvdtoull:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r3, r2, .LCPI119_0 at toc@ha
-; P8LE-NEXT: addi r3, r3, .LCPI119_0 at toc@l
-; P8LE-NEXT: lxvd2x v2, 0, r3
+; P8LE-NEXT: vspltisw v2, 4
+; P8LE-NEXT: vupklsw v2, v2
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 4, i64 4>
diff --git a/llvm/test/CodeGen/PowerPC/mul-const-vector.ll b/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
index e3d231adf734f..2d67de0d4477c 100644
--- a/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
@@ -271,8 +271,7 @@ define <2 x i64> @test1_v2i64(<2 x i64> %a) {
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test1_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v{{[0-9]+}}, v2, v[[REG2]]
@@ -282,8 +281,7 @@ define <2 x i64> @test2_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test2_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vaddudm v{{[0-9]+}}, v2, v[[REG3]]
@@ -294,8 +292,7 @@ define <2 x i64> @test3_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test3_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2
@@ -308,8 +305,7 @@ define <2 x i64> @test4_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test4_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
@@ -322,8 +318,7 @@ define <2 x i64> @test5_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test5_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vaddudm v[[REG4:[0-9]+]], v2, v[[REG3]]
@@ -337,8 +332,7 @@ define <2 x i64> @test6_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test6_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v2, v[[REG3]]
diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
index 842cb929541cf..1ab74e6cb1cee 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -105,9 +105,8 @@ define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
;
; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail:
; CHECK-NOPREFIX: # %bb.0: # %entry
-; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-NOPREFIX-NEXT: lxv vs34, 0(r3)
+; CHECK-NOPREFIX-NEXT: vspltisw v2, -16
+; CHECK-NOPREFIX-NEXT: vupklsw v2, v2
; CHECK-NOPREFIX-NEXT: blr
;
; CHECK-BE-LABEL: testDoubleToDoubleNaNFail:
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4435484ae0b94..6b29c780de600 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -22,10 +22,10 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9LE-NEXT: lfdx 0, 3, 4
; P9LE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
; P9LE-NEXT: xxlxor 2, 2, 2
-; P9LE-NEXT: vspltisw 4, 8
+; P9LE-NEXT: xxspltib 4, 16
; P9LE-NEXT: lxsd 3, 4(5)
; P9LE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
-; P9LE-NEXT: vadduwm 4, 4, 4
+; P9LE-NEXT: vextsb2w 4, 4
; P9LE-NEXT: lxv 1, 0(3)
; P9LE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
; P9LE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
@@ -45,10 +45,10 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: lxsdx 2, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
; P9BE-NEXT: xxlxor 1, 1, 1
-; P9BE-NEXT: vspltisw 4, 8
+; P9BE-NEXT: xxspltib 4, 16
; P9BE-NEXT: lxsd 3, 4(5)
; P9BE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
-; P9BE-NEXT: vadduwm 4, 4, 4
+; P9BE-NEXT: vextsb2w 4, 4
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
@@ -68,11 +68,11 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX-NEXT: lxsdx 2, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C0(2) # %const.0
; P9BE-AIX-NEXT: xxlxor 1, 1, 1
-; P9BE-AIX-NEXT: vspltisw 4, 8
+; P9BE-AIX-NEXT: xxspltib 4, 16
; P9BE-AIX-NEXT: lxsd 3, 4(5)
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1
-; P9BE-AIX-NEXT: vadduwm 4, 4, 4
+; P9BE-AIX-NEXT: vextsb2w 4, 4
; P9BE-AIX-NEXT: xxperm 2, 1, 0
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: xxperm 3, 3, 0
@@ -89,10 +89,10 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32-NEXT: lxvwsx 0, 3, 4
; P9BE-AIX32-NEXT: li 3, 4
; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
-; P9BE-AIX32-NEXT: vspltisw 4, 8
+; P9BE-AIX32-NEXT: xxspltib 4, 16
; P9BE-AIX32-NEXT: lxvwsx 1, 5, 3
; P9BE-AIX32-NEXT: lwz 3, L..C0(2) # %const.0
-; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
+; P9BE-AIX32-NEXT: vextsb2w 4, 4
; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
; P9BE-AIX32-NEXT: lxv 0, 0(3)
; P9BE-AIX32-NEXT: li 3, 8
@@ -137,11 +137,11 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9LE-NEXT: lxsiwzx 2, 3, 4
; P9LE-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
; P9LE-NEXT: xxlxor 0, 0, 0
-; P9LE-NEXT: vspltisw 4, 8
+; P9LE-NEXT: xxspltib 4, 16
; P9LE-NEXT: addi 3, 3, .LCPI1_0 at toc@l
; P9LE-NEXT: lxv 1, 0(3)
; P9LE-NEXT: li 3, 4
-; P9LE-NEXT: vadduwm 4, 4, 4
+; P9LE-NEXT: vextsb2w 4, 4
; P9LE-NEXT: lxsiwzx 3, 5, 3
; P9LE-NEXT: xxperm 2, 0, 1
; P9LE-NEXT: xxperm 3, 0, 1
@@ -158,11 +158,11 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: lxsiwzx 2, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
; P9BE-NEXT: xxlxor 0, 0, 0
-; P9BE-NEXT: vspltisw 4, 8
+; P9BE-NEXT: xxspltib 4, 16
; P9BE-NEXT: addi 3, 3, .LCPI1_0 at toc@l
; P9BE-NEXT: lxv 1, 0(3)
; P9BE-NEXT: li 3, 4
-; P9BE-NEXT: vadduwm 4, 4, 4
+; P9BE-NEXT: vextsb2w 4, 4
; P9BE-NEXT: lxsiwzx 3, 5, 3
; P9BE-NEXT: xxperm 2, 0, 1
; P9BE-NEXT: xxperm 3, 0, 1
@@ -179,10 +179,10 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
; P9BE-AIX-NEXT: xxlxor 0, 0, 0
-; P9BE-AIX-NEXT: vspltisw 4, 8
+; P9BE-AIX-NEXT: xxspltib 4, 16
; P9BE-AIX-NEXT: lxv 1, 0(3)
; P9BE-AIX-NEXT: li 3, 4
-; P9BE-AIX-NEXT: vadduwm 4, 4, 4
+; P9BE-AIX-NEXT: vextsb2w 4, 4
; P9BE-AIX-NEXT: lxsiwzx 3, 5, 3
; P9BE-AIX-NEXT: xxperm 2, 0, 1
; P9BE-AIX-NEXT: xxperm 3, 0, 1
@@ -199,10 +199,10 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
-; P9BE-AIX32-NEXT: vspltisw 4, 8
+; P9BE-AIX32-NEXT: xxspltib 4, 16
; P9BE-AIX32-NEXT: lxv 1, 0(3)
; P9BE-AIX32-NEXT: li 3, 4
-; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
+; P9BE-AIX32-NEXT: vextsb2w 4, 4
; P9BE-AIX32-NEXT: lxsiwzx 3, 5, 3
; P9BE-AIX32-NEXT: xxperm 2, 0, 1
; P9BE-AIX32-NEXT: xxperm 3, 0, 1
diff --git a/llvm/test/CodeGen/PowerPC/splat-extend.ll b/llvm/test/CodeGen/PowerPC/splat-extend.ll
new file mode 100644
index 0000000000000..4be55468a25dc
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/splat-extend.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-aix-xcoff \
+; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-aix-xcoff \
+; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+
+define dso_local noundef <8 x i16> @v103s() local_unnamed_addr #0 {
+; CHECK-LABEL: v103s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxspltib v2, 103
+; CHECK-NEXT: vupklsb v2, v2
+; CHECK-NEXT: blr
+entry:
+ ret <8 x i16> splat (i16 103)
+}
+
+define dso_local noundef <2 x i64> @v103l() local_unnamed_addr #0 {
+; CHECK-LABEL: v103l:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxspltib v2, 103
+; CHECK-NEXT: vextsb2d v2, v2
+; CHECK-NEXT: blr
+entry:
+ ret <2 x i64> splat (i64 103)
+}
+
+define dso_local noundef <4 x i32> @v103i() local_unnamed_addr #0 {
+; CHECK-LABEL: v103i:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xxspltib v2, 103
+; CHECK-NEXT: vextsb2w v2, v2
+; CHECK-NEXT: blr
+entry:
+ ret <4 x i32> splat (i32 103)
+}
+
+define dso_local noundef <2 x i64> @v11l() local_unnamed_addr #0 {
+; CHECK-LABEL: v11l:
+; CHECK: # %bb...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/142194
More information about the llvm-commits
mailing list