[llvm] 351a0d8 - [PowerPC] Update PC-Relative Load/Store Patterns to use the refactored Load/Store Implementation

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 9 13:40:14 PDT 2021


Author: Amy Kwan
Date: 2021-09-09T15:38:42-05:00
New Revision: 351a0d8a9053f14f5bee6d762ce5b40e08f3ceb4

URL: https://github.com/llvm/llvm-project/commit/351a0d8a9053f14f5bee6d762ce5b40e08f3ceb4
DIFF: https://github.com/llvm/llvm-project/commit/351a0d8a9053f14f5bee6d762ce5b40e08f3ceb4.diff

LOG: [PowerPC] Update PC-Relative Load/Store Patterns to use the refactored Load/Store Implementation

This patch updates the PC-Relative load and store patterns to utilize the
refactored load/store implementation introduced in D93370.

PC-Relative implementation has been added to PPCISelLowering.cpp, and also the
patterns in PPCInstrPrefix.td have been updated and no longer require AddedComplexity.
All existing test cases pass with this update.

Differential Revision: https://reviews.llvm.org/D95116

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
    llvm/test/CodeGen/PowerPC/reduce_scalarization.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index a62a7193f9473..1d95394ee3182 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -262,6 +262,14 @@ namespace {
                                                 None) == PPC::AM_DForm;
     }
 
+    /// SelectPCRelForm - Returns true if address N can be represented by
+    /// PC-Relative addressing mode.
+    bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
+                         SDValue &Base) {
+      return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
+                                                None) == PPC::AM_PCRel;
+    }
+
     /// SelectXForm - Returns true if address N can be represented by the
     /// addressing mode of XForm instructions (an indexed [r+r] operation).
     bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8ec5ee45f0d83..c1230c6adeebc 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17357,6 +17357,14 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
   }
 }
 
+static bool isPCRelNode(SDValue N) {
+  return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
+      isValidPCRelNode<ConstantPoolSDNode>(N) ||
+      isValidPCRelNode<GlobalAddressSDNode>(N) ||
+      isValidPCRelNode<JumpTableSDNode>(N) ||
+      isValidPCRelNode<BlockAddressSDNode>(N));
+}
+
 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
 /// the address flags of the load/store instruction that is to be matched.
 unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
@@ -17374,6 +17382,10 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
   if (Subtarget.hasSPE())
     FlagSet |= PPC::MOF_SubtargetSPE;
 
+  // Check if we have a PCRel node and return early.
+  if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
+    return FlagSet;
+
   // Mark this as something we don't want to handle here if it is atomic
   // or pre-increment instruction.
   if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
@@ -17518,6 +17530,14 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
   // Select an X-Form load if it is not.
   setXFormForUnalignedFI(N, Flags, Mode);
 
+  // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
+  if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
+    assert(Subtarget.isUsingPCRelativeCalls() &&
+           "Must be using PC-Relative calls when a valid PC-Relative node is "
+           "present!");
+    Mode = PPC::AM_PCRel;
+  }
+
   // Set Base and Disp accordingly depending on the address mode.
   switch (Mode) {
   case PPC::AM_DForm:
@@ -17589,6 +17609,12 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
       Base = N;
     break;
   }
+  case PPC::AM_PCRel: {
+    // When selecting PC-Relative instructions, "Base" is not utilized as
+    // we select the address as [PC+imm].
+    Disp = N;
+    break;
+  }
   case PPC::AM_None:
     break;
   default: { // By default, X-Form is always available to be selected.

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 26df67ddcf44e..fd33eea2903ae 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -494,6 +494,11 @@ namespace llvm {
     /// Constrained floating point add in round-to-zero mode.
     STRICT_FADDRTZ,
 
+    // NOTE: The nodes below may require PC-Rel specific patterns if the
+    // address could be PC-Relative. When adding new nodes below, consider
+    // whether or not the address can be PC-Relative and add the corresponding
+    // PC-relative patterns and tests.
+
     /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
     /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
     /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -713,6 +718,7 @@ namespace llvm {
       AM_DSForm,
       AM_DQForm,
       AM_XForm,
+      AM_PCRel
     };
   } // end namespace PPC
 

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 19b28b77bb959..b4a3f8e34fdec 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1152,15 +1152,13 @@ def addr   : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
 /// This is just the offset part of iaddr, used for preinc.
 def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
 
-// PC Relative Address
-def pcreladdr : ComplexPattern<iPTR, 1, "SelectAddrPCRel", [], []>;
-
 // Load and Store Instruction Selection addressing modes.
 def DForm  : ComplexPattern<iPTR, 2, "SelectDForm",    [], [SDNPWantParent]>;
 def DSForm : ComplexPattern<iPTR, 2, "SelectDSForm",   [], [SDNPWantParent]>;
 def DQForm : ComplexPattern<iPTR, 2, "SelectDQForm",   [], [SDNPWantParent]>;
 def XForm  : ComplexPattern<iPTR, 2, "SelectXForm",    [], [SDNPWantParent]>;
 def ForceXForm : ComplexPattern<iPTR, 2, "SelectForceXForm", [], [SDNPWantParent]>;
+def PCRelForm : ComplexPattern<iPTR, 2, "SelectPCRelForm", [], [SDNPWantParent]>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index b183dbd4b3bbb..c5a53ea05f1a6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1682,178 +1682,171 @@ let Predicates = [PairedVectorMemops] in {
   }
 }
 
-// TODO: We have an added complexity of 500 here. This is only a temporary
-// solution to have tablegen consider these patterns first. The way we do
-// addressing for PowerPC is complex depending on available D form, X form, or
-// aligned D form loads/stores like DS and DQ forms. The prefixed
-// instructions in this file also add additional PC Relative loads/stores
-// and D form loads/stores with 34 bit immediates. It is very 
diff icult to force
-// instruction selection to consistently pick these first without the current
-// added complexity. Once pc-relative implementation is complete, a set of
-// follow-up patches will address this refactoring and the AddedComplexity will
-// be removed.
-let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
+let Predicates = [PCRelativeMemops] in {
   // Load i32
-  def : Pat<(i32 (zextloadi1  (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i32 (zextloadi1  (PPCmatpcreladdr PCRelForm:$ga))),
             (PLBZpc $ga, 0)>;
-  def : Pat<(i32 (extloadi1  (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i32 (extloadi1  (PPCmatpcreladdr PCRelForm:$ga))),
             (PLBZpc $ga, 0)>;
-  def : Pat<(i32 (zextloadi8  (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i32 (zextloadi8  (PPCmatpcreladdr PCRelForm:$ga))),
             (PLBZpc $ga, 0)>;
-  def : Pat<(i32 (extloadi8   (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i32 (extloadi8   (PPCmatpcreladdr PCRelForm:$ga))),
             (PLBZpc $ga, 0)>;
-  def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLHApc $ga, 0)>;
-  def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLHZpc $ga, 0)>;
-  def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i32 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLHZpc $ga, 0)>;
-  def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>;
+  def : Pat<(i32 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>;
 
   // Store i32
-  def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTBpc $RS, $ga, 0)>;
-  def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTHpc $RS, $ga, 0)>;
-  def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTWpc $RS, $ga, 0)>;
 
   // Load i64
-  def : Pat<(i64 (zextloadi1  (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (zextloadi1  (PPCmatpcreladdr PCRelForm:$ga))),
             (PLBZ8pc $ga, 0)>;
-  def : Pat<(i64 (extloadi1  (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (extloadi1  (PPCmatpcreladdr PCRelForm:$ga))),
             (PLBZ8pc $ga, 0)>;
-  def : Pat<(i64 (zextloadi8  (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (zextloadi8  (PPCmatpcreladdr PCRelForm:$ga))),
             (PLBZ8pc $ga, 0)>;
-  def : Pat<(i64 (extloadi8   (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (extloadi8   (PPCmatpcreladdr PCRelForm:$ga))),
             (PLBZ8pc $ga, 0)>;
-  def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLHA8pc $ga, 0)>;
-  def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLHZ8pc $ga, 0)>;
-  def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLHZ8pc $ga, 0)>;
-  def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLWZ8pc $ga, 0)>;
-  def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLWA8pc $ga, 0)>;
-  def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+  def : Pat<(i64 (extloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
             (PLWZ8pc $ga, 0)>;
-  def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>;
+  def : Pat<(i64 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>;
 
   // Store i64
-  def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTB8pc $RS, $ga, 0)>;
-  def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTH8pc $RS, $ga, 0)>;
-  def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTW8pc $RS, $ga, 0)>;
-  def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTDpc $RS, $ga, 0)>;
 
   // Load f32
-  def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>;
+  def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>;
 
   // Store f32
-  def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store f32:$FRS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTFSpc $FRS, $ga, 0)>;
 
   // Load f64
-  def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))),
+  def : Pat<(f64 (extloadf32 (PPCmatpcreladdr PCRelForm:$addr))),
             (COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>;
-  def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>;
+  def : Pat<(f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFDpc $addr, 0)>;
 
   // Store f64
-  def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTFDpc $FRS, $ga, 0)>;
 
   // Load f128
-  def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))),
+  def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))),
             (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>;
 
   // Store f128
-  def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store f128:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>;
 
   // Load v4i32
-  def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+  def : Pat<(v4i32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
 
   // Store v4i32
-  def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store v4i32:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTXVpc $XS, $ga, 0)>;
 
   // Load v2i64
-  def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+  def : Pat<(v2i64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
 
   // Store v2i64
-  def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store v2i64:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTXVpc $XS, $ga, 0)>;
 
   // Load v4f32
-  def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+  def : Pat<(v4f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
 
   // Store v4f32
-  def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store v4f32:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTXVpc $XS, $ga, 0)>;
 
   // Load v2f64
-  def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+  def : Pat<(v2f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
 
   // Store v2f64
-  def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
             (PSTXVpc $XS, $ga, 0)>;
 
   // Atomic Load
-  def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)),
             (PLBZpc $ga, 0)>;
-  def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)),
             (PLHZpc $ga, 0)>;
-  def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)),
             (PLWZpc $ga, 0)>;
-  def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)),
+  def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)),
             (PLDpc $ga, 0)>;
 
   // Atomic Store
-  def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+  def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
             (PSTBpc $RS, $ga, 0)>;
-  def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+  def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
             (PSTHpc $RS, $ga, 0)>;
-  def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+  def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
             (PSTWpc $RS, $ga, 0)>;
-  def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+  def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
             (PSTB8pc $RS, $ga, 0)>;
-  def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+  def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
             (PSTH8pc $RS, $ga, 0)>;
-  def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+  def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
             (PSTW8pc $RS, $ga, 0)>;
-  def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+  def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
             (PSTDpc $RS, $ga, 0)>;
 
   // Special Cases For PPCstore_scal_int_from_vsr
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)),
-              (PPCmatpcreladdr pcreladdr:$dst), 8),
+              (PPCmatpcreladdr PCRelForm:$dst), 8),
             (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)),
-              (PPCmatpcreladdr pcreladdr:$dst), 8),
+              (PPCmatpcreladdr PCRelForm:$dst), 8),
             (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>;
 
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)),
-              (PPCmatpcreladdr pcreladdr:$dst), 8),
+              (PPCmatpcreladdr PCRelForm:$dst), 8),
             (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)),
-              (PPCmatpcreladdr pcreladdr:$dst), 8),
+              (PPCmatpcreladdr PCRelForm:$dst), 8),
             (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
 
+  def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))),
+            (SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>;
+
   // If the PPCmatpcreladdr node is not caught by any other pattern it should be
   // caught here and turned into a paddi instruction to materialize the address.
-  def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+  def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>;
   // PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize
   // tls global address with paddi instruction.
-  def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+  def : Pat<(PPCtlsdynamatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>;
   // PPCtlslocalexecmataddr node is used for TLS local exec models to
   // materialize tls global address with paddi instruction.
   def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)),

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
index c34c451525bb0..797080557ecd9 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
@@ -42,6 +42,7 @@
 @GlobSt11 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16
 @GlobLd12 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16
 @GlobSt12 = dso_local local_unnamed_addr global [20 x <16 x i8>] zeroinitializer, align 16
+ at GlobF128 = dso_local local_unnamed_addr global [20 x fp128] zeroinitializer, align 16
 
 ; Function Attrs: nofree norecurse nounwind uwtable willreturn
 define dso_local void @testGlob1PtrPlus0() {
@@ -2270,3 +2271,323 @@ entry:
   store <16 x i8> %0, <16 x i8>* %arrayidx1, align 16
   ret void
 }
+
+; Function Attrs: nofree norecurse nounwind uwtable willreturn
+define dso_local void @Atomic_LdSt_i8() {
+; CHECK-P10-LE-LABEL: Atomic_LdSt_i8:
+; CHECK-P10-LE:       # %bb.0: # %entry
+; CHECK-P10-LE-NEXT:    plbz r3, GlobLd1 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    pstb r3, GlobSt1 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: Atomic_LdSt_i8:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    addis r3, r2, GlobLd1 at toc@ha
+; CHECK-P10-BE-NEXT:    addis r4, r2, GlobSt1 at toc@ha
+; CHECK-P10-BE-NEXT:    lbz r3, GlobLd1 at toc@l(r3)
+; CHECK-P10-BE-NEXT:    stb r3, GlobSt1 at toc@l(r4)
+; CHECK-P10-BE-NEXT:    blr
+;
+; CHECK-LABEL: Atomic_LdSt_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, GlobLd1 at toc@ha
+; CHECK-NEXT:    addis r4, r2, GlobSt1 at toc@ha
+; CHECK-NEXT:    lbz r3, GlobLd1 at toc@l(r3)
+; CHECK-NEXT:    stb r3, GlobSt1 at toc@l(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load atomic i8, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @GlobLd1, i64 0, i64 0) monotonic, align 1
+  store atomic i8 %0, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @GlobSt1, i64 0, i64 0) monotonic, align 1
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind uwtable willreturn
+define dso_local void @Atomic_LdSt_i16() {
+; CHECK-P10-LE-LABEL: Atomic_LdSt_i16:
+; CHECK-P10-LE:       # %bb.0: # %entry
+; CHECK-P10-LE-NEXT:    plhz r3, GlobLd3 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    psth r3, GlobSt3 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: Atomic_LdSt_i16:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    addis r3, r2, GlobLd3 at toc@ha
+; CHECK-P10-BE-NEXT:    addis r4, r2, GlobSt3 at toc@ha
+; CHECK-P10-BE-NEXT:    lhz r3, GlobLd3 at toc@l(r3)
+; CHECK-P10-BE-NEXT:    sth r3, GlobSt3 at toc@l(r4)
+; CHECK-P10-BE-NEXT:    blr
+;
+; CHECK-LABEL: Atomic_LdSt_i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, GlobLd3 at toc@ha
+; CHECK-NEXT:    addis r4, r2, GlobSt3 at toc@ha
+; CHECK-NEXT:    lhz r3, GlobLd3 at toc@l(r3)
+; CHECK-NEXT:    sth r3, GlobSt3 at toc@l(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load atomic i16, i16* getelementptr inbounds ([20 x i16], [20 x i16]* @GlobLd3, i64 0, i64 0) monotonic, align 2
+  store atomic i16 %0, i16* getelementptr inbounds ([20 x i16], [20 x i16]* @GlobSt3, i64 0, i64 0) monotonic, align 2
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind uwtable willreturn
+define dso_local void @Atomic_LdSt_i32() {
+; CHECK-P10-LE-LABEL: Atomic_LdSt_i32:
+; CHECK-P10-LE:       # %bb.0: # %entry
+; CHECK-P10-LE-NEXT:    plwz r3, GlobLd5 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    pstw r3, GlobSt5 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: Atomic_LdSt_i32:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    addis r3, r2, GlobLd5 at toc@ha
+; CHECK-P10-BE-NEXT:    addis r4, r2, GlobSt5 at toc@ha
+; CHECK-P10-BE-NEXT:    lwz r3, GlobLd5 at toc@l(r3)
+; CHECK-P10-BE-NEXT:    stw r3, GlobSt5 at toc@l(r4)
+; CHECK-P10-BE-NEXT:    blr
+;
+; CHECK-LABEL: Atomic_LdSt_i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, GlobLd5 at toc@ha
+; CHECK-NEXT:    addis r4, r2, GlobSt5 at toc@ha
+; CHECK-NEXT:    lwz r3, GlobLd5 at toc@l(r3)
+; CHECK-NEXT:    stw r3, GlobSt5 at toc@l(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load atomic i32, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @GlobLd5, i64 0, i64 0) monotonic, align 4
+  store atomic i32 %0, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @GlobSt5, i64 0, i64 0) monotonic, align 4
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind uwtable willreturn
+define dso_local void @Atomic_LdSt_i64() {
+; CHECK-P10-LE-LABEL: Atomic_LdSt_i64:
+; CHECK-P10-LE:       # %bb.0: # %entry
+; CHECK-P10-LE-NEXT:    pld r3, GlobLd7 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    pstd r3, GlobSt7 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: Atomic_LdSt_i64:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    addis r3, r2, GlobLd7 at toc@ha
+; CHECK-P10-BE-NEXT:    addis r4, r2, GlobSt7 at toc@ha
+; CHECK-P10-BE-NEXT:    ld r3, GlobLd7 at toc@l(r3)
+; CHECK-P10-BE-NEXT:    std r3, GlobSt7 at toc@l(r4)
+; CHECK-P10-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: Atomic_LdSt_i64:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r3, r2, GlobLd7 at toc@ha
+; CHECK-P9-NEXT:    addis r4, r2, GlobSt7 at toc@ha
+; CHECK-P9-NEXT:    ld r3, GlobLd7 at toc@l(r3)
+; CHECK-P9-NEXT:    std r3, GlobSt7 at toc@l(r4)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: Atomic_LdSt_i64:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    addis r3, r2, GlobLd7 at toc@ha
+; CHECK-P8-NEXT:    ld r3, GlobLd7 at toc@l(r3)
+; CHECK-P8-NEXT:    addis r4, r2, GlobSt7 at toc@ha
+; CHECK-P8-NEXT:    std r3, GlobSt7 at toc@l(r4)
+; CHECK-P8-NEXT:    blr
+entry:
+  %0 = load atomic i64, i64* getelementptr inbounds ([20 x i64], [20 x i64]* @GlobLd7, i64 0, i64 0) monotonic, align 8
+  store atomic i64 %0, i64* getelementptr inbounds ([20 x i64], [20 x i64]* @GlobSt7, i64 0, i64 0) monotonic, align 8
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
+define dso_local void @store_double_f64_to_uint(double %str) local_unnamed_addr #0 {
+; CHECK-P10-LE-LABEL: store_double_f64_to_uint:
+; CHECK-P10-LE:       # %bb.0: # %entry
+; CHECK-P10-LE-NEXT:    xscvdpuxds v2, f1
+; CHECK-P10-LE-NEXT:    pstxsd v2, GlobSt10 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: store_double_f64_to_uint:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    xscvdpuxds v2, f1
+; CHECK-P10-BE-NEXT:    addis r3, r2, GlobSt10 at toc@ha
+; CHECK-P10-BE-NEXT:    addi r3, r3, GlobSt10 at toc@l
+; CHECK-P10-BE-NEXT:    stxsd v2, 0(r3)
+; CHECK-P10-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: store_double_f64_to_uint:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpuxds v2, f1
+; CHECK-P9-NEXT:    addis r3, r2, GlobSt10 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, GlobSt10 at toc@l
+; CHECK-P9-NEXT:    stxsd v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: store_double_f64_to_uint:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xscvdpuxds f0, f1
+; CHECK-P8-NEXT:    addis r3, r2, GlobSt10 at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, GlobSt10 at toc@l
+; CHECK-P8-NEXT:    stxsdx f0, 0, r3
+; CHECK-P8-NEXT:    blr
+entry:
+  %conv = fptoui double %str to i64
+  store i64 %conv, i64* bitcast ([20 x double]* @GlobSt10 to i64*), align 8
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
+define dso_local void @store_double_f64_to_sint(double %str) local_unnamed_addr #0 {
+; CHECK-P10-LE-LABEL: store_double_f64_to_sint:
+; CHECK-P10-LE:       # %bb.0: # %entry
+; CHECK-P10-LE-NEXT:    xscvdpsxds v2, f1
+; CHECK-P10-LE-NEXT:    pstxsd v2, GlobSt10 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: store_double_f64_to_sint:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    xscvdpsxds v2, f1
+; CHECK-P10-BE-NEXT:    addis r3, r2, GlobSt10 at toc@ha
+; CHECK-P10-BE-NEXT:    addi r3, r3, GlobSt10 at toc@l
+; CHECK-P10-BE-NEXT:    stxsd v2, 0(r3)
+; CHECK-P10-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: store_double_f64_to_sint:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvdpsxds v2, f1
+; CHECK-P9-NEXT:    addis r3, r2, GlobSt10 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, GlobSt10 at toc@l
+; CHECK-P9-NEXT:    stxsd v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: store_double_f64_to_sint:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xscvdpsxds f0, f1
+; CHECK-P8-NEXT:    addis r3, r2, GlobSt10 at toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, GlobSt10 at toc@l
+; CHECK-P8-NEXT:    stxsdx f0, 0, r3
+; CHECK-P8-NEXT:    blr
+entry:
+  %conv = fptosi double %str to i64
+  store i64 %conv, i64* bitcast ([20 x double]* @GlobSt10 to i64*), align 8
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
+define dso_local void @store_f128_to_uint(fp128 %str) local_unnamed_addr #0 {
+; CHECK-P10-LE-LABEL: store_f128_to_uint:
+; CHECK-P10-LE:       # %bb.0: # %entry
+; CHECK-P10-LE-NEXT:    xscvqpudz v2, v2
+; CHECK-P10-LE-NEXT:    pstxsd v2, GlobF128 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: store_f128_to_uint:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    xscvqpudz v2, v2
+; CHECK-P10-BE-NEXT:    addis r3, r2, GlobF128 at toc@ha
+; CHECK-P10-BE-NEXT:    addi r3, r3, GlobF128 at toc@l
+; CHECK-P10-BE-NEXT:    stxsd v2, 0(r3)
+; CHECK-P10-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: store_f128_to_uint:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvqpudz v2, v2
+; CHECK-P9-NEXT:    addis r3, r2, GlobF128 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, GlobF128 at toc@l
+; CHECK-P9-NEXT:    stxsd v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LE-LABEL: store_f128_to_uint:
+; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    mflr r0
+; CHECK-P8-LE-NEXT:    std r0, 16(r1)
+; CHECK-P8-LE-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-LE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-P8-LE-NEXT:    .cfi_offset lr, 16
+; CHECK-P8-LE-NEXT:    bl __fixunskfdi
+; CHECK-P8-LE-NEXT:    nop
+; CHECK-P8-LE-NEXT:    addis r4, r2, GlobF128 at toc@ha
+; CHECK-P8-LE-NEXT:    std r3, GlobF128 at toc@l(r4)
+; CHECK-P8-LE-NEXT:    addi r1, r1, 32
+; CHECK-P8-LE-NEXT:    ld r0, 16(r1)
+; CHECK-P8-LE-NEXT:    mtlr r0
+; CHECK-P8-LE-NEXT:    blr
+;
+; CHECK-P8-BE-LABEL: store_f128_to_uint:
+; CHECK-P8-BE:       # %bb.0: # %entry
+; CHECK-P8-BE-NEXT:    mflr r0
+; CHECK-P8-BE-NEXT:    std r0, 16(r1)
+; CHECK-P8-BE-NEXT:    stdu r1, -112(r1)
+; CHECK-P8-BE-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-P8-BE-NEXT:    .cfi_offset lr, 16
+; CHECK-P8-BE-NEXT:    bl __fixunskfdi
+; CHECK-P8-BE-NEXT:    nop
+; CHECK-P8-BE-NEXT:    addis r4, r2, GlobF128 at toc@ha
+; CHECK-P8-BE-NEXT:    std r3, GlobF128 at toc@l(r4)
+; CHECK-P8-BE-NEXT:    addi r1, r1, 112
+; CHECK-P8-BE-NEXT:    ld r0, 16(r1)
+; CHECK-P8-BE-NEXT:    mtlr r0
+; CHECK-P8-BE-NEXT:    blr
+entry:
+  %conv = fptoui fp128 %str to i64
+  store i64 %conv, i64* bitcast ([20 x fp128]* @GlobF128 to i64*), align 16
+  ret void
+}
+
+; Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
+define dso_local void @store_f128_to_sint(fp128 %str) local_unnamed_addr #0 {
+; CHECK-P10-LE-LABEL: store_f128_to_sint:
+; CHECK-P10-LE:       # %bb.0: # %entry
+; CHECK-P10-LE-NEXT:    xscvqpsdz v2, v2
+; CHECK-P10-LE-NEXT:    pstxsd v2, GlobF128 at PCREL(0), 1
+; CHECK-P10-LE-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: store_f128_to_sint:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    xscvqpsdz v2, v2
+; CHECK-P10-BE-NEXT:    addis r3, r2, GlobF128 at toc@ha
+; CHECK-P10-BE-NEXT:    addi r3, r3, GlobF128 at toc@l
+; CHECK-P10-BE-NEXT:    stxsd v2, 0(r3)
+; CHECK-P10-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: store_f128_to_sint:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xscvqpsdz v2, v2
+; CHECK-P9-NEXT:    addis r3, r2, GlobF128 at toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, GlobF128 at toc@l
+; CHECK-P9-NEXT:    stxsd v2, 0(r3)
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LE-LABEL: store_f128_to_sint:
+; CHECK-P8-LE:       # %bb.0: # %entry
+; CHECK-P8-LE-NEXT:    mflr r0
+; CHECK-P8-LE-NEXT:    std r0, 16(r1)
+; CHECK-P8-LE-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-LE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-P8-LE-NEXT:    .cfi_offset lr, 16
+; CHECK-P8-LE-NEXT:    bl __fixkfdi
+; CHECK-P8-LE-NEXT:    nop
+; CHECK-P8-LE-NEXT:    addis r4, r2, GlobF128 at toc@ha
+; CHECK-P8-LE-NEXT:    std r3, GlobF128 at toc@l(r4)
+; CHECK-P8-LE-NEXT:    addi r1, r1, 32
+; CHECK-P8-LE-NEXT:    ld r0, 16(r1)
+; CHECK-P8-LE-NEXT:    mtlr r0
+; CHECK-P8-LE-NEXT:    blr
+;
+; CHECK-P8-BE-LABEL: store_f128_to_sint:
+; CHECK-P8-BE:       # %bb.0: # %entry
+; CHECK-P8-BE-NEXT:    mflr r0
+; CHECK-P8-BE-NEXT:    std r0, 16(r1)
+; CHECK-P8-BE-NEXT:    stdu r1, -112(r1)
+; CHECK-P8-BE-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-P8-BE-NEXT:    .cfi_offset lr, 16
+; CHECK-P8-BE-NEXT:    bl __fixkfdi
+; CHECK-P8-BE-NEXT:    nop
+; CHECK-P8-BE-NEXT:    addis r4, r2, GlobF128 at toc@ha
+; CHECK-P8-BE-NEXT:    std r3, GlobF128 at toc@l(r4)
+; CHECK-P8-BE-NEXT:    addi r1, r1, 112
+; CHECK-P8-BE-NEXT:    ld r0, 16(r1)
+; CHECK-P8-BE-NEXT:    mtlr r0
+; CHECK-P8-BE-NEXT:    blr
+entry:
+  %conv = fptosi fp128 %str to i64
+  store i64 %conv, i64* bitcast ([20 x fp128]* @GlobF128 to i64*), align 16
+  ret void
+}

diff  --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
index cbc8a93677274..15dc6bc80dba8 100644
--- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
+++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
@@ -5,6 +5,12 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
 ; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \
 ; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
+; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:     < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
+; RUN:     -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:     < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE
 
 ; Function Attrs: norecurse nounwind readonly
 define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) {
@@ -75,3 +81,30 @@ entry:
   %2 = fpext <2 x float> %sub to <2 x double>
   ret <2 x double> %2
 }
+
+ at G = dso_local local_unnamed_addr global <2 x float> <float 3.000000e+00, float 0x3FF3333340000000>, align 8
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind readonly uwtable willreturn
+define dso_local <2 x double> @test5(<2 x double> %a) {
+; CHECK-P10-LABEL: test5:
+; CHECK-P10:       # %bb.0: # %entry
+; CHECK-P10-NEXT:    plfd f0, G at PCREL(0), 1
+; CHECK-P10-NEXT:    xxmrghw vs0, vs0, vs0
+; CHECK-P10-NEXT:    xvcvspdp vs0, vs0
+; CHECK-P10-NEXT:    xvadddp v2, vs0, v2
+; CHECK-P10-NEXT:    blr
+;
+; CHECK-P10-BE-LABEL: test5:
+; CHECK-P10-BE:       # %bb.0: # %entry
+; CHECK-P10-BE-NEXT:    addis r3, r2, G at toc@ha
+; CHECK-P10-BE-NEXT:    lfd f0, G at toc@l(r3)
+; CHECK-P10-BE-NEXT:    xxmrghw vs0, vs0, vs0
+; CHECK-P10-BE-NEXT:    xvcvspdp vs0, vs0
+; CHECK-P10-BE-NEXT:    xvadddp v2, vs0, v2
+; CHECK-P10-BE-NEXT:    blr
+entry:
+  %0 = load <2 x float>, <2 x float>* @G, align 8
+  %1 = fpext <2 x float> %0 to <2 x double>
+  %add = fadd <2 x double> %1, %a
+  ret <2 x double> %add
+}


        


More information about the llvm-commits mailing list