[llvm] fc59f2c - [PowerPC] special case small int constant for custom scalar_to_vector (#109850)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 21 09:19:11 PDT 2024


Author: RolandF77
Date: 2024-10-21T12:19:07-04:00
New Revision: fc59f2cc0f191bb7a0706dfb65e3e46fef69f466

URL: https://github.com/llvm/llvm-project/commit/fc59f2cc0f191bb7a0706dfb65e3e46fef69f466
DIFF: https://github.com/llvm/llvm-project/commit/fc59f2cc0f191bb7a0706dfb65e3e46fef69f466.diff

LOG: [PowerPC] special case small int constant for custom scalar_to_vector (#109850)

Special case small int constant in the PPC custom lowering of
scalar_to_vector.

Added: 
    llvm/test/CodeGen/PowerPC/const-stov.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/load-and-splat.ll
    llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
    llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cb0c8bade67012..7199fac9b110b6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11595,6 +11595,15 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
 
   MachineFunction &MF = DAG.getMachineFunction();
   SDValue Op0 = Op.getOperand(0);
+  EVT ValVT = Op0.getValueType();
+  unsigned EltSize = Op.getValueType().getScalarSizeInBits();
+  if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
+    int64_t IntVal = Op.getConstantOperandVal(0);
+    if (IntVal >= -16 && IntVal <= 15)
+      return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
+                                    dl);
+  }
+
   ReuseLoadInfo RLI;
   if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
       Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
@@ -11619,7 +11628,6 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   SDValue Val = Op0;
-  EVT ValVT = Val.getValueType();
   // P10 hardware store forwarding requires that a single store contains all
   // the data for the load. P10 is able to merge a pair of adjacent stores. Try
   // to avoid load hit store on P10 when running binaries compiled for older

diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index fba6725e2b2a3f..2259b6e0f44df6 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -26,18 +26,14 @@ define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR7-LE-LABEL: build_v2i64_extload_0:
 ; PWR7-LE:       # %bb.0: # %entry
-; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI0_0 at toc@ha
 ; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
-; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI0_0 at toc@l
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxspltw 35, 0, 1
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; PWR7-LE-NEXT:    xxlxor 36, 36, 36
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
+; PWR7-LE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 34, 1
-; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v2i64_extload_0:
@@ -357,18 +353,14 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_0:
 ; PWR7-LE:       # %bb.0: # %entry
-; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
 ; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
-; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxspltw 35, 0, 1
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-LE-NEXT:    xxlxor 36, 36, 36
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI8_0 at toc@l
+; PWR7-LE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 34, 1
-; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    vperm 2, 4, 2, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_0:
@@ -412,18 +404,14 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_1:
 ; PWR7-LE:       # %bb.0: # %entry
-; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI9_0 at toc@ha
 ; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
-; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI9_0 at toc@l
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxspltw 35, 0, 1
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-LE-NEXT:    xxlxor 36, 36, 36
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
+; PWR7-LE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 34, 1
-; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_1:
@@ -469,18 +457,14 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_2:
 ; PWR7-LE:       # %bb.0: # %entry
-; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
 ; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
-; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxspltw 35, 0, 1
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-LE-NEXT:    xxlxor 36, 36, 36
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI10_0 at toc@l
+; PWR7-LE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 34, 1
-; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_2:
@@ -524,18 +508,14 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_3:
 ; PWR7-LE:       # %bb.0: # %entry
-; PWR7-LE-NEXT:    li 4, 0
-; PWR7-LE-NEXT:    stw 4, -16(1)
-; PWR7-LE-NEXT:    addis 4, 2, .LCPI11_0 at toc@ha
 ; PWR7-LE-NEXT:    lfiwzx 0, 0, 3
-; PWR7-LE-NEXT:    addi 3, 1, -16
-; PWR7-LE-NEXT:    addi 4, 4, .LCPI11_0 at toc@l
-; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
-; PWR7-LE-NEXT:    xxspltw 35, 0, 1
+; PWR7-LE-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-LE-NEXT:    xxlxor 36, 36, 36
+; PWR7-LE-NEXT:    addi 3, 3, .LCPI11_0 at toc@l
+; PWR7-LE-NEXT:    xxspltw 34, 0, 1
 ; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
-; PWR7-LE-NEXT:    xxswapd 34, 1
-; PWR7-LE-NEXT:    xxswapd 36, 0
-; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    xxswapd 35, 0
+; PWR7-LE-NEXT:    vperm 2, 2, 4, 3
 ; PWR7-LE-NEXT:    blr
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_3:

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index e1159e56e23ebe..7f6fdc7f88cd11 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -849,16 +849,12 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea
 ;
 ; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    li r5, 0
 ; P8-AIX-32-NEXT:    slwi r4, r4, 2
-; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
-; P8-AIX-32-NEXT:    stw r5, -16(r1)
+; P8-AIX-32-NEXT:    xxlxor v2, v2, v2
 ; P8-AIX-32-NEXT:    lfiwzx f0, r3, r4
-; P8-AIX-32-NEXT:    addi r3, r1, -16
-; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r3
 ; P8-AIX-32-NEXT:    xxspltw vs0, vs0, 1
-; P8-AIX-32-NEXT:    xxmrghw v2, vs1, vs0
-; P8-AIX-32-NEXT:    vmrghb v2, v2, v3
+; P8-AIX-32-NEXT:    xxmrghw v3, v2, vs0
+; P8-AIX-32-NEXT:    vmrghb v2, v3, v2
 ; P8-AIX-32-NEXT:    blr
 entry:
   %idx.ext = sext i32 %offset to i64

diff  --git a/llvm/test/CodeGen/PowerPC/const-stov.ll b/llvm/test/CodeGen/PowerPC/const-stov.ll
new file mode 100644
index 00000000000000..69c68a4f27371e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/const-stov.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN:     -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck \
+; RUN:     --check-prefix=PWR7-BE %s
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN:     -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck \
+; RUN:     --check-prefix=PWR8-BE %s
+; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \
+; RUN:     -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck \
+; RUN:     --check-prefix=PWR8-LE %s
+
+define  <16 x i8> @i8(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i8:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lxvw4x v3, 0, r3
+; PWR7-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; PWR7-BE-NEXT:    vspltisb v2, 10
+; PWR7-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x v4, 0, r3
+; PWR7-BE-NEXT:    vperm v2, v3, v2, v4
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: i8:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lxvw4x v2, 0, r3
+; PWR8-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; PWR8-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; PWR8-BE-NEXT:    lxvw4x v3, 0, r3
+; PWR8-BE-NEXT:    li r3, 10
+; PWR8-BE-NEXT:    mtvsrwz v4, r3
+; PWR8-BE-NEXT:    vperm v2, v2, v4, v3
+; PWR8-BE-NEXT:    blr
+;
+; PWR8-LE-LABEL: i8:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; PWR8-LE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; PWR8-LE-NEXT:    xxswapd v2, vs0
+; PWR8-LE-NEXT:    lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT:    li r3, 10
+; PWR8-LE-NEXT:    mtvsrd v4, r3
+; PWR8-LE-NEXT:    xxswapd v3, vs0
+; PWR8-LE-NEXT:    vperm v2, v4, v2, v3
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load <16 x i8>, ptr %p, align 16
+  %vecinit1 = insertelement <16 x i8> %0, i8 10, i64 1
+  ret <16 x i8> %vecinit1
+}
+
+define  <8 x i16> @i16(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i16:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lxvw4x v3, 0, r3
+; PWR7-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; PWR7-BE-NEXT:    vspltish v2, 9
+; PWR7-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x v4, 0, r3
+; PWR7-BE-NEXT:    vperm v2, v3, v2, v4
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: i16:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lxvw4x v2, 0, r3
+; PWR8-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; PWR8-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; PWR8-BE-NEXT:    lxvw4x v3, 0, r3
+; PWR8-BE-NEXT:    li r3, 9
+; PWR8-BE-NEXT:    mtvsrwz v4, r3
+; PWR8-BE-NEXT:    vperm v2, v2, v4, v3
+; PWR8-BE-NEXT:    blr
+;
+; PWR8-LE-LABEL: i16:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; PWR8-LE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; PWR8-LE-NEXT:    xxswapd v2, vs0
+; PWR8-LE-NEXT:    lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT:    li r3, 9
+; PWR8-LE-NEXT:    mtvsrd v4, r3
+; PWR8-LE-NEXT:    xxswapd v3, vs0
+; PWR8-LE-NEXT:    vperm v2, v4, v2, v3
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load <8 x i16>, ptr %p, align 16
+  %vecinit1 = insertelement <8 x i16> %0, i16 9, i64 1
+  ret <8 x i16> %vecinit1
+}
+
+define  <4 x i32> @i32(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i32:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lxvw4x v3, 0, r3
+; PWR7-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; PWR7-BE-NEXT:    vspltisw v2, 7
+; PWR7-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x v4, 0, r3
+; PWR7-BE-NEXT:    vperm v2, v3, v2, v4
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: i32:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lxvw4x v2, 0, r3
+; PWR8-BE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; PWR8-BE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; PWR8-BE-NEXT:    lxvw4x v3, 0, r3
+; PWR8-BE-NEXT:    li r3, 7
+; PWR8-BE-NEXT:    mtvsrwz v4, r3
+; PWR8-BE-NEXT:    vperm v2, v2, v4, v3
+; PWR8-BE-NEXT:    blr
+;
+; PWR8-LE-LABEL: i32:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; PWR8-LE-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; PWR8-LE-NEXT:    xxswapd v2, vs0
+; PWR8-LE-NEXT:    lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT:    li r3, 7
+; PWR8-LE-NEXT:    mtvsrwz v4, r3
+; PWR8-LE-NEXT:    xxswapd v3, vs0
+; PWR8-LE-NEXT:    vperm v2, v4, v2, v3
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load <4 x i32>, ptr %p, align 16
+  %vecinit1 = insertelement <4 x i32> %0, i32 7, i64 1
+  ret <4 x i32> %vecinit1
+}
+
+define  <2 x i64> @i64(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: i64:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lxvd2x v2, 0, r3
+; PWR7-BE-NEXT:    li r3, 10
+; PWR7-BE-NEXT:    std r3, -16(r1)
+; PWR7-BE-NEXT:    std r3, -8(r1)
+; PWR7-BE-NEXT:    addi r3, r1, -16
+; PWR7-BE-NEXT:    lxvd2x v3, 0, r3
+; PWR7-BE-NEXT:    xxmrghd v2, v2, v3
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: i64:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lxvd2x v2, 0, r3
+; PWR8-BE-NEXT:    li r3, 10
+; PWR8-BE-NEXT:    mtfprd f0, r3
+; PWR8-BE-NEXT:    xxmrghd v2, v2, vs0
+; PWR8-BE-NEXT:    blr
+;
+; PWR8-LE-LABEL: i64:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lxvd2x vs0, 0, r3
+; PWR8-LE-NEXT:    li r3, 10
+; PWR8-LE-NEXT:    xxswapd v2, vs0
+; PWR8-LE-NEXT:    mtfprd f0, r3
+; PWR8-LE-NEXT:    xxpermdi v2, vs0, v2, 1
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load <2 x i64>, ptr %p, align 16
+  %vecinit1 = insertelement <2 x i64> %0, i64 10, i64 1
+  ret <2 x i64> %vecinit1
+}

diff  --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index c9ee3a51f41724..1993b1678b3ea4 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -356,11 +356,9 @@ define void @test6(ptr %a, ptr %in) {
 ;
 ; P9-AIX32-LABEL: test6:
 ; P9-AIX32:       # %bb.0: # %entry
-; P9-AIX32-NEXT:    li r5, 0
-; P9-AIX32-NEXT:    stw r5, -16(r1)
 ; P9-AIX32-NEXT:    lwz r5, L..C2(r2) # %const.0
 ; P9-AIX32-NEXT:    lxvwsx vs1, 0, r4
-; P9-AIX32-NEXT:    lxv vs2, -16(r1)
+; P9-AIX32-NEXT:    xxlxor vs2, vs2, vs2
 ; P9-AIX32-NEXT:    lxv vs0, 0(r5)
 ; P9-AIX32-NEXT:    xxperm vs1, vs2, vs0
 ; P9-AIX32-NEXT:    stxv vs1, 0(r3)
@@ -368,13 +366,10 @@ define void @test6(ptr %a, ptr %in) {
 ;
 ; P8-AIX32-LABEL: test6:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    li r5, 0
-; P8-AIX32-NEXT:    stw r5, -16(r1)
 ; P8-AIX32-NEXT:    lfiwzx f0, 0, r4
 ; P8-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
+; P8-AIX32-NEXT:    xxlxor v4, v4, v4
 ; P8-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P8-AIX32-NEXT:    addi r4, r1, -16
-; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
 ; P8-AIX32-NEXT:    xxspltw v2, vs0, 1
 ; P8-AIX32-NEXT:    vperm v2, v4, v2, v3
 ; P8-AIX32-NEXT:    stxvw4x v2, 0, r3
@@ -382,13 +377,10 @@ define void @test6(ptr %a, ptr %in) {
 ;
 ; P7-AIX32-LABEL: test6:
 ; P7-AIX32:       # %bb.0: # %entry
-; P7-AIX32-NEXT:    li r5, 0
-; P7-AIX32-NEXT:    stw r5, -16(r1)
 ; P7-AIX32-NEXT:    lfiwzx f0, 0, r4
 ; P7-AIX32-NEXT:    lwz r4, L..C2(r2) # %const.0
+; P7-AIX32-NEXT:    xxlxor v4, v4, v4
 ; P7-AIX32-NEXT:    lxvw4x v3, 0, r4
-; P7-AIX32-NEXT:    addi r4, r1, -16
-; P7-AIX32-NEXT:    lxvw4x v4, 0, r4
 ; P7-AIX32-NEXT:    xxspltw v2, vs0, 1
 ; P7-AIX32-NEXT:    vperm v2, v4, v2, v3
 ; P7-AIX32-NEXT:    stxvw4x v2, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
index ad6a576fbf50ef..04e7110b669a9d 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll
@@ -60,15 +60,13 @@ define hidden void @function1() {
 ; CHECK-LINUX-32:       # %bb.0: # %entry
 ; CHECK-LINUX-32-NEXT:    mflr r0
 ; CHECK-LINUX-32-NEXT:    stw r0, 4(r1)
-; CHECK-LINUX-32-NEXT:    stwu r1, -48(r1)
-; CHECK-LINUX-32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-LINUX-32-NEXT:    stwu r1, -32(r1)
+; CHECK-LINUX-32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LINUX-32-NEXT:    .cfi_offset lr, 4
 ; CHECK-LINUX-32-NEXT:    bl call1
-; CHECK-LINUX-32-NEXT:    li r4, 0
 ; CHECK-LINUX-32-NEXT:    stw r3, 16(r1)
-; CHECK-LINUX-32-NEXT:    stw r4, 32(r1)
-; CHECK-LINUX-32-NEXT:    lwz r0, 52(r1)
-; CHECK-LINUX-32-NEXT:    addi r1, r1, 48
+; CHECK-LINUX-32-NEXT:    lwz r0, 36(r1)
+; CHECK-LINUX-32-NEXT:    addi r1, r1, 32
 ; CHECK-LINUX-32-NEXT:    mtlr r0
 ; CHECK-LINUX-32-NEXT:    blr
 ;
@@ -76,13 +74,11 @@ define hidden void @function1() {
 ; CHECK-AIX-32:       # %bb.0: # %entry
 ; CHECK-AIX-32-NEXT:    mflr r0
 ; CHECK-AIX-32-NEXT:    stw r0, 8(r1)
-; CHECK-AIX-32-NEXT:    stwu r1, -96(r1)
+; CHECK-AIX-32-NEXT:    stwu r1, -80(r1)
 ; CHECK-AIX-32-NEXT:    bl .call1[PR]
 ; CHECK-AIX-32-NEXT:    nop
-; CHECK-AIX-32-NEXT:    li r4, 0
 ; CHECK-AIX-32-NEXT:    stw r3, 64(r1)
-; CHECK-AIX-32-NEXT:    stw r4, 80(r1)
-; CHECK-AIX-32-NEXT:    addi r1, r1, 96
+; CHECK-AIX-32-NEXT:    addi r1, r1, 80
 ; CHECK-AIX-32-NEXT:    lwz r0, 8(r1)
 ; CHECK-AIX-32-NEXT:    mtlr r0
 ; CHECK-AIX-32-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
index 0171e27e80901d..35b478017383fa 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll
@@ -8,30 +8,27 @@
 define dso_local fastcc void @BuildVectorICE() unnamed_addr {
 ; 32BIT-LABEL: BuildVectorICE:
 ; 32BIT:       # %bb.0: # %entry
-; 32BIT-NEXT:    stwu 1, -64(1)
-; 32BIT-NEXT:    .cfi_def_cfa_offset 64
-; 32BIT-NEXT:    li 4, .LCPI0_0 at l
-; 32BIT-NEXT:    lis 5, .LCPI0_0 at ha
+; 32BIT-NEXT:    stwu 1, -48(1)
+; 32BIT-NEXT:    .cfi_def_cfa_offset 48
 ; 32BIT-NEXT:    lxvw4x 34, 0, 3
-; 32BIT-NEXT:    li 3, 0
-; 32BIT-NEXT:    addi 6, 1, 48
-; 32BIT-NEXT:    li 7, 0
-; 32BIT-NEXT:    lxvw4x 35, 5, 4
+; 32BIT-NEXT:    li 3, .LCPI0_0 at l
+; 32BIT-NEXT:    lis 4, .LCPI0_0 at ha
+; 32BIT-NEXT:    li 5, 0
+; 32BIT-NEXT:    xxlxor 36, 36, 36
+; 32BIT-NEXT:    lxvw4x 35, 4, 3
+; 32BIT-NEXT:    addi 3, 1, 16
 ; 32BIT-NEXT:    addi 4, 1, 32
-; 32BIT-NEXT:    addi 5, 1, 16
 ; 32BIT-NEXT:    .p2align 4
 ; 32BIT-NEXT:  .LBB0_1: # %while.body
 ; 32BIT-NEXT:    #
-; 32BIT-NEXT:    stw 3, 32(1)
-; 32BIT-NEXT:    stw 7, 16(1)
-; 32BIT-NEXT:    lxvw4x 36, 0, 4
-; 32BIT-NEXT:    lxvw4x 37, 0, 5
-; 32BIT-NEXT:    vperm 4, 5, 4, 3
-; 32BIT-NEXT:    vadduwm 4, 2, 4
-; 32BIT-NEXT:    xxspltw 37, 36, 1
-; 32BIT-NEXT:    vadduwm 4, 4, 5
-; 32BIT-NEXT:    stxvw4x 36, 0, 6
-; 32BIT-NEXT:    lwz 7, 48(1)
+; 32BIT-NEXT:    stw 5, 16(1)
+; 32BIT-NEXT:    lxvw4x 37, 0, 3
+; 32BIT-NEXT:    vperm 5, 5, 4, 3
+; 32BIT-NEXT:    vadduwm 5, 2, 5
+; 32BIT-NEXT:    xxspltw 32, 37, 1
+; 32BIT-NEXT:    vadduwm 5, 5, 0
+; 32BIT-NEXT:    stxvw4x 37, 0, 4
+; 32BIT-NEXT:    lwz 5, 32(1)
 ; 32BIT-NEXT:    b .LBB0_1
 ;
 ; 64BIT-LABEL: BuildVectorICE:


        


More information about the llvm-commits mailing list