[llvm] 719b563 - [PowerPC][Power10] Exploit splat instruction xxsplti32dx in Power10

Wed Jan 20 09:56:06 PST 2021

Author: Albion Fung
Date: 2021-01-20T12:55:52-05:00
New Revision: 719b563ecf6851136e4c1e6a5ff6c407522dd024

URL: https://github.com/llvm/llvm-project/commit/719b563ecf6851136e4c1e6a5ff6c407522dd024
DIFF: https://github.com/llvm/llvm-project/commit/719b563ecf6851136e4c1e6a5ff6c407522dd024.diff

LOG: [PowerPC][Power10] Exploit splat instruction xxsplti32dx in Power10

Exploits the instruction xxsplti32dx.

It can be used to materialize any 64 bit scalar/vector splat by using two instances, one for the upper 32 bits and the other for the lower 32 bits. It should not materialize the cases which can be materialized by using the instruction xxspltidp.

Differential Revision: https://https://reviews.llvm.org/D90173

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
    llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d6dd70fb1a0c..b37ac7d72dc2 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8604,14 +8604,41 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 
   // If it is a splat of a double, check if we can shrink it to a 32 bit
   // non-denormal float which when converted back to double gives us the same
-  // double. This is to exploit the XXSPLTIDP instruction.
-  if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
-      (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
-      convertToNonDenormSingle(APSplatBits)) {
-    SDValue SplatNode = DAG.getNode(
-        PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
-        DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
-    return DAG.getBitcast(Op.getValueType(), SplatNode);
+  // double. This is to exploit the XXSPLTIDP instruction.+  // If we lose precision, we use XXSPLTI32DX.
+  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
+      Subtarget.hasPrefixInstrs()) {
+    if (convertToNonDenormSingle(APSplatBits) &&
+        (Op->getValueType(0) == MVT::v2f64)) {
+      SDValue SplatNode = DAG.getNode(
+          PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+          DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+      return DAG.getBitcast(Op.getValueType(), SplatNode);
+    } else { // We may lose precision, so we have to use XXSPLTI32DX.
+
+      uint32_t Hi =
+          (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
+      uint32_t Lo =
+          (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
+      SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
+
+      if (!Hi || !Lo)
+        // If either load is 0, then we should generate XXLXOR to set to 0.
+        SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
+
+      if (Hi)
+        SplatNode = DAG.getNode(
+            PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+            DAG.getTargetConstant(0, dl, MVT::i32),
+            DAG.getTargetConstant(Hi, dl, MVT::i32));
+
+      if (Lo)
+        SplatNode =
+            DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+                        DAG.getTargetConstant(1, dl, MVT::i32),
+                        DAG.getTargetConstant(Lo, dl, MVT::i32));
+
+      return DAG.getBitcast(Op.getValueType(), SplatNode);
+    }
   }
 
   if (!BVNIsConstantSplat || SplatBitSize > 32) {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index b6e9562dd0f6..b9eb3b3b7d37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2533,6 +2533,9 @@ let Predicates = [IsISA3_1] in {
 
   def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
             (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
+
+  def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)),
+             (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>;
 }
 
 let Predicates = [IsISA3_1, HasVSX] in {

diff  --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
index 0cb8fafe7a3c..0836c4cb7bbe 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -1,114 +1,216 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
-; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
+; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s --check-prefixes=CHECK-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \
-; RUN:     --check-prefix=CHECK-NOPCREL
+; RUN:     --check-prefixes=CHECK-NOPCREL-BE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN:     FileCheck %s --check-prefixes=CHECK-NOPCREL-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -mattr=-prefix-instrs -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN:     FileCheck %s --check-prefixes=CHECK-NOPREFIX
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s
+; RUN:     FileCheck %s --check-prefixes=CHECK-BE
 
 define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr {
-; CHECK-LABEL: testDoubleToDoubleFail:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI0_0 at PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleToDoubleFail:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleToDoubleFail:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 3.423300e+02, double 3.423300e+02>
 }
 
 define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormToDouble:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI1_0 at PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDouble:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormToDouble:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDouble:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDouble:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormToDouble:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormToDouble:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000>
 }
 
 define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
-; CHECK-LABEL: testDoubleToDoubleNaNFail:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI2_0 at PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0>
 }
 
 define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
-; CHECK-LABEL: testDoubleNonRepresentableScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f1, .LCPI3_0 at PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-NOPCREL-NEXT:    lfd f1, .LCPI3_0 at toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfd f1, .LCPI3_0 at PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfd f1, .LCPI3_0 at toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfd f1, .LCPI3_0 at toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-NOPREFIX-NEXT:    lfd f1, .LCPI3_0 at toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfd f1, .LCPI3_0 at PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret double 3.423300e+02
 }
 
 define dso_local float @testFloatDenormScalar() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfs f1, .LCPI4_0 at PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI4_0 at toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfs f1, .LCPI4_0 at PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI4_0 at toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI4_0 at toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-NOPREFIX-NEXT:    lfs f1, .LCPI4_0 at toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfs f1, .LCPI4_0 at PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret float 0x380B38FB80000000
 }
 
 define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormToDoubleScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfs f1, .LCPI5_0 at PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI5_0 at toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfs f1, .LCPI5_0 at PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI5_0 at toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI5_0 at toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-NOPREFIX-NEXT:    lfs f1, .LCPI5_0 at toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfs f1, .LCPI5_0 at PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret double 0x380B38FB80000000
 }

diff  --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
index d610bd260fc9..420a96dc1495 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -1,22 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck --check-prefix=CHECK-LE %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
-; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck --check-prefix=CHECK-BE %s
+; RUN:     FileCheck %s
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_1(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_1:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 566
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_1:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 566
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 566
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 566, i32 undef, i32 566>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -24,15 +16,10 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_2(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_2:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, 33
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_2:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 33
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 33
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 33, i32 undef, i32 33, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -40,15 +27,10 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_3(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_3:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 12
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_3:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 12
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 12
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 12, i32 undef, i32 12>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -56,15 +38,10 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_4(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_4:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -683
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_4:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -683
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -683
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 -683, i32 undef, i32 -683, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -72,15 +49,10 @@ entry:
 
 ; Function Attrs: nounwind
 define  <4 x float> @test_xxsplti32dx_5(<4 x float> %vfa) {
-; CHECK-LE-LABEL: test_xxsplti32dx_5:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 1065353216
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_5:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 1065353216
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 1065353216
+; CHECK-NEXT:    blr
 entry:
   %vecins3.i = shufflevector <4 x float> %vfa, <4 x float> <float undef, float 1.000000e+00, float undef, float 1.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x float> %vecins3.i
@@ -88,15 +60,10 @@ entry:
 
 ; Function Attrs: nounwind
 define  <4 x float> @test_xxsplti32dx_6(<4 x float> %vfa) {
-; CHECK-LE-LABEL: test_xxsplti32dx_6:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, 1073741824
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_6:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 1073741824
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 1073741824
+; CHECK-NEXT:    blr
 entry:
   %vecins3.i = shufflevector <4 x float> <float 2.000000e+00, float undef, float 2.000000e+00, float undef>, <4 x float> %vfa, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x float> %vecins3.i
@@ -105,16 +72,31 @@ entry:
 ; Function Attrs: norecurse nounwind readnone
 ; Test to illustrate when the splat is narrower than 32-bits.
 define dso_local <4 x i32> @test_xxsplti32dx_7(<4 x i32> %a) local_unnamed_addr #0 {
-; CHECK-LE-LABEL: test_xxsplti32dx_7:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -1414812757
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_7:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -1414812757
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -1414812757
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 -1414812757, i32 undef, i32 -1414812757, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
 }
+
+define dso_local <2 x double> @test_xxsplti32dx_8() {
+; CHECK-LABEL: test_xxsplti32dx_8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 1082660167
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NEXT:    blr
+entry:
+  ret <2 x double> <double 0x40881547AE147AE1, double 0x40881547AE147AE1>
+}
+
+define dso_local <8 x i16> @test_xxsplti32dx_9() {
+; CHECK-LABEL: test_xxsplti32dx_9:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 23855277
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 65827
+; CHECK-NEXT:    blr
+entry:
+  ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 173, i16 undef>
+}