[llvm] fc59f2c - [PowerPC] special case small int constant for custom scalar_to_vector (#109850)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 09:19:11 PDT 2024
Author: RolandF77
Date: 2024-10-21T12:19:07-04:00
New Revision: fc59f2cc0f191bb7a0706dfb65e3e46fef69f466
URL: https://github.com/llvm/llvm-project/commit/fc59f2cc0f191bb7a0706dfb65e3e46fef69f466
DIFF: https://github.com/llvm/llvm-project/commit/fc59f2cc0f191bb7a0706dfb65e3e46fef69f466.diff
LOG: [PowerPC] special case small int constant for custom scalar_to_vector (#109850)
Special case small int constant in the PPC custom lowering of
scalar_to_vector.
Added:
llvm/test/CodeGen/PowerPC/const-stov.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
llvm/test/CodeGen/PowerPC/load-and-splat.ll
llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cb0c8bade67012..7199fac9b110b6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11595,6 +11595,15 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
SDValue Op0 = Op.getOperand(0);
+ EVT ValVT = Op0.getValueType();
+ unsigned EltSize = Op.getValueType().getScalarSizeInBits();
+ if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
+ int64_t IntVal = Op.getConstantOperandVal(0);
+ if (IntVal >= -16 && IntVal <= 15)
+ return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
+ dl);
+ }
+
ReuseLoadInfo RLI;
if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
@@ -11619,7 +11628,6 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Val = Op0;
- EVT ValVT = Val.getValueType();
// P10 hardware store forwarding requires that a single store contains all
// the data for the load. P10 is able to merge a pair of adjacent stores. Try
// to avoid load hit store on P10 when running binaries compiled for older
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index fba6725e2b2a3f..2259b6e0f44df6 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -26,18 +26,14 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v2i64_extload_0:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0 at toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0 at toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 4, 2, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -357,18 +353,14 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_0:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0 at toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0 at toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI8_0 at toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 4, 2, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -412,18 +404,14 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_1:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0 at toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0 at toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI9_0 at toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -469,18 +457,14 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_2:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0 at toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0 at toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI10_0 at toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -524,18 +508,14 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR7-LE-LABEL: build_v4i32_load_3:
; PWR7-LE: # %bb.0: # %entry
-; PWR7-LE-NEXT: li 4, 0
-; PWR7-LE-NEXT: stw 4, -16(1)
-; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0 at toc@ha
; PWR7-LE-NEXT: lfiwzx 0, 0, 3
-; PWR7-LE-NEXT: addi 3, 1, -16
-; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0 at toc@l
-; PWR7-LE-NEXT: lxvd2x 1, 0, 4
-; PWR7-LE-NEXT: xxspltw 35, 0, 1
+; PWR7-LE-NEXT: addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-LE-NEXT: xxlxor 36, 36, 36
+; PWR7-LE-NEXT: addi 3, 3, .LCPI11_0 at toc@l
+; PWR7-LE-NEXT: xxspltw 34, 0, 1
; PWR7-LE-NEXT: lxvd2x 0, 0, 3
-; PWR7-LE-NEXT: xxswapd 34, 1
-; PWR7-LE-NEXT: xxswapd 36, 0
-; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: xxswapd 35, 0
+; PWR7-LE-NEXT: vperm 2, 2, 4, 3
; PWR7-LE-NEXT: blr
;
; PWR8-LE-LABEL: build_v4i32_load_3:
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index e1159e56e23ebe..7f6fdc7f88cd11 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -849,16 +849,12 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
;
; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: li r5, 0
; P8-AIX-32-NEXT: slwi r4, r4, 2
-; P8-AIX-32-NEXT: xxlxor v3, v3, v3
-; P8-AIX-32-NEXT: stw r5, -16(r1)
+; P8-AIX-32-NEXT: xxlxor v2, v2, v2
; P8-AIX-32-NEXT: lfiwzx f0, r3, r4
-; P8-AIX-32-NEXT: addi r3, r1, -16
-; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3
; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1
-; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
-; P8-AIX-32-NEXT: vmrghb v2, v2, v3
+; P8-AIX-32-NEXT: xxmrghw v3, v2, vs0
+; P8-AIX-32-NEXT: vmrghb v2, v3, v2
; P8-AIX-32-NEXT: blr
entry:
%idx.ext = sext i32 %offset to i64
diff --git a/llvm/test/CodeGen/PowerPC/const-stov.ll b/llvm/test/CodeGen/PowerPC/const-stov.ll
new file mode 100644
index 00000000000000..69c68a4f27371e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/const-stov.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN: -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck \
+; RUN: --check-prefix=PWR7-BE %s
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN: -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck \
+; RUN: --check-prefix=PWR8-BE %s
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN: -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck \
+; RUN: --check-prefix=PWR8-LE %s
+
+define <16 x i8> @i8(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i8:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lxvw4x v3, 0, r3
+; PWR7-BE-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; PWR7-BE-NEXT: vspltisb v2, 10
+; PWR7-BE-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x v4, 0, r3
+; PWR7-BE-NEXT: vperm v2, v3, v2, v4
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: i8:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lxvw4x v2, 0, r3
+; PWR8-BE-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; PWR8-BE-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; PWR8-BE-NEXT: lxvw4x v3, 0, r3
+; PWR8-BE-NEXT: li r3, 10
+; PWR8-BE-NEXT: mtvsrwz v4, r3
+; PWR8-BE-NEXT: vperm v2, v2, v4, v3
+; PWR8-BE-NEXT: blr
+;
+; PWR8-LE-LABEL: i8:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; PWR8-LE-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; PWR8-LE-NEXT: xxswapd v2, vs0
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: li r3, 10
+; PWR8-LE-NEXT: mtvsrd v4, r3
+; PWR8-LE-NEXT: xxswapd v3, vs0
+; PWR8-LE-NEXT: vperm v2, v4, v2, v3
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load <16 x i8>, ptr %p, align 16
+ %vecinit1 = insertelement <16 x i8> %0, i8 10, i64 1
+ ret <16 x i8> %vecinit1
+}
+
+define <8 x i16> @i16(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i16:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lxvw4x v3, 0, r3
+; PWR7-BE-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; PWR7-BE-NEXT: vspltish v2, 9
+; PWR7-BE-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x v4, 0, r3
+; PWR7-BE-NEXT: vperm v2, v3, v2, v4
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: i16:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lxvw4x v2, 0, r3
+; PWR8-BE-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; PWR8-BE-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; PWR8-BE-NEXT: lxvw4x v3, 0, r3
+; PWR8-BE-NEXT: li r3, 9
+; PWR8-BE-NEXT: mtvsrwz v4, r3
+; PWR8-BE-NEXT: vperm v2, v2, v4, v3
+; PWR8-BE-NEXT: blr
+;
+; PWR8-LE-LABEL: i16:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; PWR8-LE-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; PWR8-LE-NEXT: xxswapd v2, vs0
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: li r3, 9
+; PWR8-LE-NEXT: mtvsrd v4, r3
+; PWR8-LE-NEXT: xxswapd v3, vs0
+; PWR8-LE-NEXT: vperm v2, v4, v2, v3
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load <8 x i16>, ptr %p, align 16
+ %vecinit1 = insertelement <8 x i16> %0, i16 9, i64 1
+ ret <8 x i16> %vecinit1
+}
+
+define <4 x i32> @i32(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i32:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lxvw4x v3, 0, r3
+; PWR7-BE-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; PWR7-BE-NEXT: vspltisw v2, 7
+; PWR7-BE-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x v4, 0, r3
+; PWR7-BE-NEXT: vperm v2, v3, v2, v4
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: i32:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lxvw4x v2, 0, r3
+; PWR8-BE-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; PWR8-BE-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; PWR8-BE-NEXT: lxvw4x v3, 0, r3
+; PWR8-BE-NEXT: li r3, 7
+; PWR8-BE-NEXT: mtvsrwz v4, r3
+; PWR8-BE-NEXT: vperm v2, v2, v4, v3
+; PWR8-BE-NEXT: blr
+;
+; PWR8-LE-LABEL: i32:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; PWR8-LE-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; PWR8-LE-NEXT: xxswapd v2, vs0
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: li r3, 7
+; PWR8-LE-NEXT: mtvsrwz v4, r3
+; PWR8-LE-NEXT: xxswapd v3, vs0
+; PWR8-LE-NEXT: vperm v2, v4, v2, v3
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load <4 x i32>, ptr %p, align 16
+ %vecinit1 = insertelement <4 x i32> %0, i32 7, i64 1
+ ret <4 x i32> %vecinit1
+}
+
+define <2 x i64> @i64(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i64:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lxvd2x v2, 0, r3
+; PWR7-BE-NEXT: li r3, 10
+; PWR7-BE-NEXT: std r3, -16(r1)
+; PWR7-BE-NEXT: std r3, -8(r1)
+; PWR7-BE-NEXT: addi r3, r1, -16
+; PWR7-BE-NEXT: lxvd2x v3, 0, r3
+; PWR7-BE-NEXT: xxmrghd v2, v2, v3
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: i64:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lxvd2x v2, 0, r3
+; PWR8-BE-NEXT: li r3, 10
+; PWR8-BE-NEXT: mtfprd f0, r3
+; PWR8-BE-NEXT: xxmrghd v2, v2, vs0
+; PWR8-BE-NEXT: blr
+;
+; PWR8-LE-LABEL: i64:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT: li r3, 10
+; PWR8-LE-NEXT: xxswapd v2, vs0
+; PWR8-LE-NEXT: mtfprd f0, r3
+; PWR8-LE-NEXT: xxpermdi v2, vs0, v2, 1
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load <2 x i64>, ptr %p, align 16
+ %vecinit1 = insertelement <2 x i64> %0, i64 10, i64 1
+ ret <2 x i64> %vecinit1
+}
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index c9ee3a51f41724..1993b1678b3ea4 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -356,11 +356,9 @@ define void @test6(ptr %a, ptr %in) {
;
; P9-AIX32-LABEL: test6:
; P9-AIX32: # %bb.0: # %entry
-; P9-AIX32-NEXT: li r5, 0
-; P9-AIX32-NEXT: stw r5, -16(r1)
; P9-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0
; P9-AIX32-NEXT: lxvwsx vs1, 0, r4
-; P9-AIX32-NEXT: lxv vs2, -16(r1)
+; P9-AIX32-NEXT: xxlxor vs2, vs2, vs2
; P9-AIX32-NEXT: lxv vs0, 0(r5)
; P9-AIX32-NEXT: xxperm vs1, vs2, vs0
; P9-AIX32-NEXT: stxv vs1, 0(r3)
@@ -368,13 +366,10 @@ define void @test6(ptr %a, ptr %in) {
;
; P8-AIX32-LABEL: test6:
; P8-AIX32: # %bb.0: # %entry
-; P8-AIX32-NEXT: li r5, 0
-; P8-AIX32-NEXT: stw r5, -16(r1)
; P8-AIX32-NEXT: lfiwzx f0, 0, r4
; P8-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
+; P8-AIX32-NEXT: xxlxor v4, v4, v4
; P8-AIX32-NEXT: lxvw4x v3, 0, r4
-; P8-AIX32-NEXT: addi r4, r1, -16
-; P8-AIX32-NEXT: lxvw4x v4, 0, r4
; P8-AIX32-NEXT: xxspltw v2, vs0, 1
; P8-AIX32-NEXT: vperm v2, v4, v2, v3
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
@@ -382,13 +377,10 @@ define void @test6(ptr %a, ptr %in) {
;
; P7-AIX32-LABEL: test6:
; P7-AIX32: # %bb.0: # %entry
-; P7-AIX32-NEXT: li r5, 0
-; P7-AIX32-NEXT: stw r5, -16(r1)
; P7-AIX32-NEXT: lfiwzx f0, 0, r4
; P7-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
+; P7-AIX32-NEXT: xxlxor v4, v4, v4
; P7-AIX32-NEXT: lxvw4x v3, 0, r4
-; P7-AIX32-NEXT: addi r4, r1, -16
-; P7-AIX32-NEXT: lxvw4x v4, 0, r4
; P7-AIX32-NEXT: xxspltw v2, vs0, 1
; P7-AIX32-NEXT: vperm v2, v4, v2, v3
; P7-AIX32-NEXT: stxvw4x v2, 0, r3
diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
index ad6a576fbf50ef..04e7110b669a9d 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
@@ -60,15 +60,13 @@ define hidden void @function1() {
; CHECK-LINUX-32: # %bb.0: # %entry
; CHECK-LINUX-32-NEXT: mflr r0
; CHECK-LINUX-32-NEXT: stw r0, 4(r1)
-; CHECK-LINUX-32-NEXT: stwu r1, -48(r1)
-; CHECK-LINUX-32-NEXT: .cfi_def_cfa_offset 48
+; CHECK-LINUX-32-NEXT: stwu r1, -32(r1)
+; CHECK-LINUX-32-NEXT: .cfi_def_cfa_offset 32
; CHECK-LINUX-32-NEXT: .cfi_offset lr, 4
; CHECK-LINUX-32-NEXT: bl call1
-; CHECK-LINUX-32-NEXT: li r4, 0
; CHECK-LINUX-32-NEXT: stw r3, 16(r1)
-; CHECK-LINUX-32-NEXT: stw r4, 32(r1)
-; CHECK-LINUX-32-NEXT: lwz r0, 52(r1)
-; CHECK-LINUX-32-NEXT: addi r1, r1, 48
+; CHECK-LINUX-32-NEXT: lwz r0, 36(r1)
+; CHECK-LINUX-32-NEXT: addi r1, r1, 32
; CHECK-LINUX-32-NEXT: mtlr r0
; CHECK-LINUX-32-NEXT: blr
;
@@ -76,13 +74,11 @@ define hidden void @function1() {
; CHECK-AIX-32: # %bb.0: # %entry
; CHECK-AIX-32-NEXT: mflr r0
; CHECK-AIX-32-NEXT: stw r0, 8(r1)
-; CHECK-AIX-32-NEXT: stwu r1, -96(r1)
+; CHECK-AIX-32-NEXT: stwu r1, -80(r1)
; CHECK-AIX-32-NEXT: bl .call1[PR]
; CHECK-AIX-32-NEXT: nop
-; CHECK-AIX-32-NEXT: li r4, 0
; CHECK-AIX-32-NEXT: stw r3, 64(r1)
-; CHECK-AIX-32-NEXT: stw r4, 80(r1)
-; CHECK-AIX-32-NEXT: addi r1, r1, 96
+; CHECK-AIX-32-NEXT: addi r1, r1, 80
; CHECK-AIX-32-NEXT: lwz r0, 8(r1)
; CHECK-AIX-32-NEXT: mtlr r0
; CHECK-AIX-32-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
index 0171e27e80901d..35b478017383fa 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
@@ -8,30 +8,27 @@
define dso_local fastcc void @BuildVectorICE() unnamed_addr {
; 32BIT-LABEL: BuildVectorICE:
; 32BIT: # %bb.0: # %entry
-; 32BIT-NEXT: stwu 1, -64(1)
-; 32BIT-NEXT: .cfi_def_cfa_offset 64
-; 32BIT-NEXT: li 4, .LCPI0_0 at l
-; 32BIT-NEXT: lis 5, .LCPI0_0 at ha
+; 32BIT-NEXT: stwu 1, -48(1)
+; 32BIT-NEXT: .cfi_def_cfa_offset 48
; 32BIT-NEXT: lxvw4x 34, 0, 3
-; 32BIT-NEXT: li 3, 0
-; 32BIT-NEXT: addi 6, 1, 48
-; 32BIT-NEXT: li 7, 0
-; 32BIT-NEXT: lxvw4x 35, 5, 4
+; 32BIT-NEXT: li 3, .LCPI0_0 at l
+; 32BIT-NEXT: lis 4, .LCPI0_0 at ha
+; 32BIT-NEXT: li 5, 0
+; 32BIT-NEXT: xxlxor 36, 36, 36
+; 32BIT-NEXT: lxvw4x 35, 4, 3
+; 32BIT-NEXT: addi 3, 1, 16
; 32BIT-NEXT: addi 4, 1, 32
-; 32BIT-NEXT: addi 5, 1, 16
; 32BIT-NEXT: .p2align 4
; 32BIT-NEXT: .LBB0_1: # %while.body
; 32BIT-NEXT: #
-; 32BIT-NEXT: stw 3, 32(1)
-; 32BIT-NEXT: stw 7, 16(1)
-; 32BIT-NEXT: lxvw4x 36, 0, 4
-; 32BIT-NEXT: lxvw4x 37, 0, 5
-; 32BIT-NEXT: vperm 4, 5, 4, 3
-; 32BIT-NEXT: vadduwm 4, 2, 4
-; 32BIT-NEXT: xxspltw 37, 36, 1
-; 32BIT-NEXT: vadduwm 4, 4, 5
-; 32BIT-NEXT: stxvw4x 36, 0, 6
-; 32BIT-NEXT: lwz 7, 48(1)
+; 32BIT-NEXT: stw 5, 16(1)
+; 32BIT-NEXT: lxvw4x 37, 0, 3
+; 32BIT-NEXT: vperm 5, 5, 4, 3
+; 32BIT-NEXT: vadduwm 5, 2, 5
+; 32BIT-NEXT: xxspltw 32, 37, 1
+; 32BIT-NEXT: vadduwm 5, 5, 0
+; 32BIT-NEXT: stxvw4x 37, 0, 4
+; 32BIT-NEXT: lwz 5, 32(1)
; 32BIT-NEXT: b .LBB0_1
;
; 64BIT-LABEL: BuildVectorICE:
More information about the llvm-commits
mailing list