[llvm] [PowerPC] Optimize BUILD_VECTOR from load and zeros (PR #73609)

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 21:55:34 PST 2023


https://github.com/bzEq updated https://github.com/llvm/llvm-project/pull/73609

>From fe5c6b5cb324774e79863ac898d1acecbe2310b4 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:18:08 +0000
Subject: [PATCH] Optimize BUILD_VECTOR

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td        | 136 ++++++++++
 .../build-vector-from-load-and-zeros.ll       | 234 ++++--------------
 llvm/test/CodeGen/PowerPC/vec-promote.ll      |  59 ++---
 3 files changed, 202 insertions(+), 227 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0e5f6b773bb5441..c479bca70fef2fd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2431,6 +2431,47 @@ def DblwdCmp {
                                (v2i64 (XXSPLTW EQWSHAND, 2)), 0));
 }
 
+class SplatAndAssignIndexed<
+      SDPatternOperator op,
+      int Total, dag splat,
+      int Index, dag assign> {
+  defvar head = !listsplat(splat, Index);
+  defvar x = [assign];
+  defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
+  list<dag> Ops = !listconcat(head, x, tail);
+  dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
+}
+
+class BVExtLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    2, (f64 fpimm0),
+    Index, (f64 (extloadf32 ForceXForm:$src))>;
+
+class BVZExtLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    2, (i64 0),
+    Index, (i64 (zextloadi32 ForceXForm:$src))>;
+
+class BVLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    4, (i32 0),
+    Index, (i32 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    4, (f32 fpimm0),
+    Index, (f32 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosDbl<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    2, (f64 fpimm0),
+    Index, (f64 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosLong<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    2, (i64 0),
+    Index, (i64 (load ForceXForm:$src))>;
+
 //---------------------------- Anonymous Patterns ----------------------------//
 // Predicate combinations are kept in roughly chronological order in terms of
 // instruction availability in the architecture. For example, VSX came in with
@@ -3449,6 +3490,53 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
             (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
                                    sub_64), ForceXForm:$src)>;
 }
+
+// BUILD_VECTOR via single load and zeros.
+// Extension load.
+def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
+          (v2f64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
+          (v2i64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
+
+// Normal load.
+foreach Index = !range(4) in {
+  defvar Temp = !sub(5, Index);
+  defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
+  if !ne(Offset, 0) then {
+    def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+              (v4i32 (XXSLDWIs
+                     (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+                     Offset))>;
+    def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+              (v4f32 (XXSLDWIs
+                     (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+                     Offset))>;
+  } else {
+    def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+              (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+    def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+              (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+  }
+}
+
+def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
+          (v2f64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
+          (v2i64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
 } // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
 
 // Little endian Power8 VSX subtarget.
@@ -3542,6 +3630,54 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
             (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
                                    sub_64), ForceXForm:$src)>;
 }
+
+// BUILD_VECTOR via single load and zeros.
+// Extension load.
+def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
+          (v2f64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
+          (v2i64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
+
+// Normal load.
+foreach Index = !range(4) in {
+  defvar Temp = !sub(!add(Index, 4), 2);
+  defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
+  if !ne(Offset, 0) then {
+    def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+              (v4i32 (XXSLDWIs
+                     (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+                     Offset))>;
+    def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+              (v4f32 (XXSLDWIs
+                     (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+                     Offset))>;
+  } else {
+    def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+              (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+    def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+              (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+  }
+}
+
+def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
+          (v2f64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
+          (v2i64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
 } // HasVSX, HasP8Vector, IsLittleEndian
 
 // Big endian pre-Power9 VSX subtarget.
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index cc32a76b22c2872..a2f65ad75972e69 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -17,11 +17,7 @@ define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2i64_extload_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    mtfprd 0, 4
-; PWR8-BE-NEXT:    mtfprd 1, 3
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2i64_extload_0:
@@ -45,13 +41,8 @@ define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2i64_extload_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    mtfprd 0, 3
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -73,11 +64,8 @@ define  <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2i64_extload_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    mtfprd 0, 4
-; PWR8-BE-NEXT:    mtfprd 1, 3
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxswapd 34, 0
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2i64_extload_1:
@@ -93,11 +81,7 @@ define  <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2i64_extload_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    mtfprd 0, 4
-; PWR8-LE-NEXT:    mtfprd 1, 3
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -116,9 +100,7 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2f64_extload_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsspx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2f64_extload_0:
@@ -130,9 +112,8 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2f64_extload_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsspx 0, 0, 3
+; PWR8-LE-NEXT:    xxswapd 34, 0
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -151,9 +132,8 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2f64_extload_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsspx 0, 0, 3
+; PWR8-BE-NEXT:    xxswapd 34, 0
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2f64_extload_1:
@@ -165,9 +145,7 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2f64_extload_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsspx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -186,9 +164,7 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2f64_load_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfd 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsdx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2f64_load_0:
@@ -200,9 +176,8 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2f64_load_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfd 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsdx 0, 0, 3
+; PWR8-LE-NEXT:    xxswapd 34, 0
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load double, ptr %p, align 8
@@ -220,9 +195,8 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2f64_load_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfd 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsdx 0, 0, 3
+; PWR8-BE-NEXT:    xxswapd 34, 0
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2f64_load_1:
@@ -234,9 +208,7 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2f64_load_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfd 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsdx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load double, ptr %p, align 8
@@ -257,11 +229,7 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2i64_load_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    ld 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    mtfprd 0, 4
-; PWR8-BE-NEXT:    mtfprd 1, 3
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsdx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2i64_load_0:
@@ -277,11 +245,8 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2i64_load_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    ld 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    mtfprd 0, 4
-; PWR8-LE-NEXT:    mtfprd 1, 3
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsdx 0, 0, 3
+; PWR8-LE-NEXT:    xxswapd 34, 0
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i64, ptr %p, align 8
@@ -302,11 +267,8 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2i64_load_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    ld 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    mtfprd 0, 4
-; PWR8-BE-NEXT:    mtfprd 1, 3
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsdx 0, 0, 3
+; PWR8-BE-NEXT:    xxswapd 34, 0
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2i64_load_1:
@@ -322,11 +284,7 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2i64_load_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    ld 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    mtfprd 0, 4
-; PWR8-LE-NEXT:    mtfprd 1, 3
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsdx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i64, ptr %p, align 8
@@ -353,14 +311,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    li 5, 0
-; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT:    rldimi 5, 3, 32, 0
-; PWR8-BE-NEXT:    mtfprd 1, 4
-; PWR8-BE-NEXT:    mtfprd 0, 5
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 1
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_0:
@@ -384,13 +336,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    mtfprd 0, 3
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -417,13 +364,7 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT:    mtfprd 0, 3
-; PWR8-BE-NEXT:    mtfprd 1, 4
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_1:
@@ -447,14 +388,8 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    li 5, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 5, 3, 32, 0
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    mtfprd 0, 5
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -481,14 +416,8 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_2:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    li 5, 0
-; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT:    rldimi 5, 3, 32, 0
-; PWR8-BE-NEXT:    mtfprd 1, 4
-; PWR8-BE-NEXT:    mtfprd 0, 5
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_2:
@@ -512,13 +441,7 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_2:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    mtfprd 0, 3
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -545,13 +468,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_3:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT:    mtfprd 0, 3
-; PWR8-BE-NEXT:    mtfprd 1, 4
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_3:
@@ -575,14 +493,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_3:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    li 5, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 5, 3, 32, 0
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    mtfprd 0, 5
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 1
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -609,13 +521,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4f32_load_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 0, 0, 1
-; PWR8-BE-NEXT:    xxspltd 1, 1, 0
-; PWR8-BE-NEXT:    xvcvdpsp 34, 0
-; PWR8-BE-NEXT:    xvcvdpsp 35, 1
-; PWR8-BE-NEXT:    vmrgew 2, 2, 3
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 1
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4f32_load_0:
@@ -639,13 +546,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4f32_load_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 0, 1, 0
-; PWR8-LE-NEXT:    xxspltd 1, 1, 0
-; PWR8-LE-NEXT:    xvcvdpsp 34, 0
-; PWR8-LE-NEXT:    xvcvdpsp 35, 1
-; PWR8-LE-NEXT:    vmrgew 2, 3, 2
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -672,13 +574,7 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4f32_load_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 0, 0, 1
-; PWR8-BE-NEXT:    xxspltd 1, 1, 0
-; PWR8-BE-NEXT:    xvcvdpsp 34, 0
-; PWR8-BE-NEXT:    xvcvdpsp 35, 1
-; PWR8-BE-NEXT:    vmrgew 2, 3, 2
+; PWR8-BE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4f32_load_1:
@@ -702,13 +598,8 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4f32_load_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 0, 1, 0
-; PWR8-LE-NEXT:    xxspltd 1, 1, 0
-; PWR8-LE-NEXT:    xvcvdpsp 34, 0
-; PWR8-LE-NEXT:    xvcvdpsp 35, 1
-; PWR8-LE-NEXT:    vmrgew 2, 2, 3
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -735,13 +626,8 @@ define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4f32_load_2:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 0, 1, 0
-; PWR8-BE-NEXT:    xxspltd 1, 1, 0
-; PWR8-BE-NEXT:    xvcvdpsp 34, 0
-; PWR8-BE-NEXT:    xvcvdpsp 35, 1
-; PWR8-BE-NEXT:    vmrgew 2, 2, 3
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4f32_load_2:
@@ -765,13 +651,7 @@ define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4f32_load_2:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 0, 0, 1
-; PWR8-LE-NEXT:    xxspltd 1, 1, 0
-; PWR8-LE-NEXT:    xvcvdpsp 34, 0
-; PWR8-LE-NEXT:    xvcvdpsp 35, 1
-; PWR8-LE-NEXT:    vmrgew 2, 3, 2
+; PWR8-LE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -798,13 +678,8 @@ define <4 x float> @build_v4f32_load_3(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4f32_load_3:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 0, 1, 0
-; PWR8-BE-NEXT:    xxspltd 1, 1, 0
-; PWR8-BE-NEXT:    xvcvdpsp 34, 0
-; PWR8-BE-NEXT:    xvcvdpsp 35, 1
-; PWR8-BE-NEXT:    vmrgew 2, 3, 2
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4f32_load_3:
@@ -828,13 +703,8 @@ define <4 x float> @build_v4f32_load_3(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4f32_load_3:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 0, 0, 1
-; PWR8-LE-NEXT:    xxspltd 1, 1, 0
-; PWR8-LE-NEXT:    xvcvdpsp 34, 0
-; PWR8-LE-NEXT:    xvcvdpsp 35, 1
-; PWR8-LE-NEXT:    vmrgew 2, 2, 3
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 1
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
index 628c5101c079652..1715532f07792ff 100644
--- a/llvm/test/CodeGen/PowerPC/vec-promote.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -7,16 +7,13 @@
 define noundef <2 x double> @vec_promote_double_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_double_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lfd 0, 0(3)
-; CHECK-BE-NEXT:    xxlxor 1, 1, 1
-; CHECK-BE-NEXT:    xxmrghd 34, 0, 1
+; CHECK-BE-NEXT:    lxsdx 34, 0, 3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_double_zeroed:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    lfd 0, 0(3)
-; CHECK-LE-NEXT:    xxlxor 1, 1, 1
-; CHECK-LE-NEXT:    xxmrghd 34, 1, 0
+; CHECK-LE-NEXT:    lxsdx 0, 0, 3
+; CHECK-LE-NEXT:    xxswapd 34, 0
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load double, ptr %p, align 8
@@ -43,24 +40,14 @@ entry:
 define noundef <4 x float> @vec_promote_float_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_float_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lfs 0, 0(3)
-; CHECK-BE-NEXT:    xxlxor 1, 1, 1
-; CHECK-BE-NEXT:    xxmrghd 0, 0, 1
-; CHECK-BE-NEXT:    xxspltd 1, 1, 0
-; CHECK-BE-NEXT:    xvcvdpsp 34, 0
-; CHECK-BE-NEXT:    xvcvdpsp 35, 1
-; CHECK-BE-NEXT:    vmrgew 2, 2, 3
+; CHECK-BE-NEXT:    lxsiwzx 0, 0, 3
+; CHECK-BE-NEXT:    xxsldwi 34, 0, 0, 1
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_float_zeroed:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    lfs 0, 0(3)
-; CHECK-LE-NEXT:    xxlxor 1, 1, 1
-; CHECK-LE-NEXT:    xxmrghd 0, 1, 0
-; CHECK-LE-NEXT:    xxspltd 1, 1, 0
-; CHECK-LE-NEXT:    xvcvdpsp 34, 0
-; CHECK-LE-NEXT:    xvcvdpsp 35, 1
-; CHECK-LE-NEXT:    vmrgew 2, 3, 2
+; CHECK-LE-NEXT:    lxsiwzx 0, 0, 3
+; CHECK-LE-NEXT:    xxsldwi 34, 0, 0, 2
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 8
@@ -89,20 +76,13 @@ entry:
 define noundef <2 x i64> @vec_promote_long_long_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_long_long_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    ld 3, 0(3)
-; CHECK-BE-NEXT:    li 4, 0
-; CHECK-BE-NEXT:    mtfprd 0, 4
-; CHECK-BE-NEXT:    mtfprd 1, 3
-; CHECK-BE-NEXT:    xxmrghd 34, 1, 0
+; CHECK-BE-NEXT:    lxsdx 34, 0, 3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_long_long_zeroed:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    ld 3, 0(3)
-; CHECK-LE-NEXT:    li 4, 0
-; CHECK-LE-NEXT:    mtfprd 0, 4
-; CHECK-LE-NEXT:    mtfprd 1, 3
-; CHECK-LE-NEXT:    xxmrghd 34, 0, 1
+; CHECK-LE-NEXT:    lxsdx 0, 0, 3
+; CHECK-LE-NEXT:    xxswapd 34, 0
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load i64, ptr %p, align 8
@@ -129,25 +109,14 @@ entry:
 define noundef <4 x i32> @vec_promote_int_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_int_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lwz 3, 0(3)
-; CHECK-BE-NEXT:    li 4, 0
-; CHECK-BE-NEXT:    li 5, 0
-; CHECK-BE-NEXT:    rldimi 4, 4, 32, 0
-; CHECK-BE-NEXT:    rldimi 5, 3, 32, 0
-; CHECK-BE-NEXT:    mtfprd 1, 4
-; CHECK-BE-NEXT:    mtfprd 0, 5
-; CHECK-BE-NEXT:    xxmrghd 34, 0, 1
+; CHECK-BE-NEXT:    lxsiwzx 0, 0, 3
+; CHECK-BE-NEXT:    xxsldwi 34, 0, 0, 1
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_int_zeroed:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    lwz 3, 0(3)
-; CHECK-LE-NEXT:    li 4, 0
-; CHECK-LE-NEXT:    rldimi 3, 4, 32, 0
-; CHECK-LE-NEXT:    rldimi 4, 4, 32, 0
-; CHECK-LE-NEXT:    mtfprd 0, 3
-; CHECK-LE-NEXT:    mtfprd 1, 4
-; CHECK-LE-NEXT:    xxmrghd 34, 1, 0
+; CHECK-LE-NEXT:    lxsiwzx 0, 0, 3
+; CHECK-LE-NEXT:    xxsldwi 34, 0, 0, 2
 ; CHECK-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4



More information about the llvm-commits mailing list