[llvm] [PowerPC] Optimize BUILD_VECTOR from load and zeros (PR #126599)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 10 13:24:26 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: Kamau Bridgeman (kamaub)

<details>
<summary>Changes</summary>

We are encountered with patterns like `BUILD_VECTOR 0, 0, (load), 0`
resulted in suboptimal codegen. This PR improves it.

Original Patch by: Kai Luo in #<!-- -->73609


---

Patch is 25.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126599.diff


3 Files Affected:

- (modified) llvm/lib/Target/PowerPC/PPCInstrVSX.td (+136) 
- (modified) llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll (+52-182) 
- (modified) llvm/test/CodeGen/PowerPC/vec-promote.ll (+14-45) 


``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 8e400bc63b7851a..783044ef02b56fc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2431,6 +2431,47 @@ def DblwdCmp {
                                (v2i64 (XXSPLTW EQWSHAND, 2)), 0));
 }
 
+class SplatAndAssignIndexed<
+      SDPatternOperator op,
+      int Total, dag splat,
+      int Index, dag assign> {
+  defvar head = !listsplat(splat, Index);
+  defvar x = [assign];
+  defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
+  list<dag> Ops = !listconcat(head, x, tail);
+  dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
+}
+
+class BVExtLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    2, (f64 fpimm0),
+    Index, (f64 (extloadf32 ForceXForm:$src))>;
+
+class BVZExtLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    2, (i64 0),
+    Index, (i64 (zextloadi32 ForceXForm:$src))>;
+
+class BVLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    4, (i32 0),
+    Index, (i32 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    4, (f32 fpimm0),
+    Index, (f32 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosDbl<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    2, (f64 fpimm0),
+    Index, (f64 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosLong<int Index> : SplatAndAssignIndexed<
+    build_vector,
+    2, (i64 0),
+    Index, (i64 (load ForceXForm:$src))>;
+
 //---------------------------- Anonymous Patterns ----------------------------//
 // Predicate combinations are kept in roughly chronological order in terms of
 // instruction availability in the architecture. For example, VSX came in with
@@ -3449,6 +3490,53 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
             (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
                                    sub_64), ForceXForm:$src)>;
 }
+
+// BUILD_VECTOR via single load and zeros.
+// Extension load.
+def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
+          (v2f64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
+          (v2i64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
+
+// Normal load.
+foreach Index = !range(4) in {
+  defvar Temp = !sub(5, Index);
+  defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
+  if !ne(Offset, 0) then {
+    def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+              (v4i32 (XXSLDWIs
+                     (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+                     Offset))>;
+    def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+              (v4f32 (XXSLDWIs
+                     (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+                     Offset))>;
+  } else {
+    def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+              (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+    def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+              (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+  }
+}
+
+def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
+          (v2f64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
+          (v2i64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
 } // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
 
 // Little endian Power8 VSX subtarget.
@@ -3542,6 +3630,54 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
             (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
                                    sub_64), ForceXForm:$src)>;
 }
+
+// BUILD_VECTOR via single load and zeros.
+// Extension load.
+def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
+          (v2f64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
+          (v2i64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
+
+// Normal load.
+foreach Index = !range(4) in {
+  defvar Temp = !sub(!add(Index, 4), 2);
+  defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
+  if !ne(Offset, 0) then {
+    def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+              (v4i32 (XXSLDWIs
+                     (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+                     Offset))>;
+    def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+              (v4f32 (XXSLDWIs
+                     (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+                     Offset))>;
+  } else {
+    def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+              (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+    def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+              (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+  }
+}
+
+def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
+          (v2f64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
+          (v2i64 (XXPERMDIs
+                 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
 } // HasVSX, HasP8Vector, IsLittleEndian
 
 // Big endian pre-Power9 VSX subtarget.
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 2259b6e0f44df64..fb67221e7d9f306 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -17,11 +17,7 @@ define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2i64_extload_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    mtfprd 0, 4
-; PWR8-BE-NEXT:    mtfprd 1, 3
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2i64_extload_0:
@@ -38,13 +34,8 @@ define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2i64_extload_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    mtfprd 0, 3
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -66,11 +57,8 @@ define  <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2i64_extload_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    mtfprd 0, 4
-; PWR8-BE-NEXT:    mtfprd 1, 3
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxswapd 34, 0
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2i64_extload_1:
@@ -86,11 +74,7 @@ define  <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2i64_extload_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    mtfprd 0, 4
-; PWR8-LE-NEXT:    mtfprd 1, 3
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -109,9 +93,7 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2f64_extload_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsspx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2f64_extload_0:
@@ -123,9 +105,8 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2f64_extload_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsspx 0, 0, 3
+; PWR8-LE-NEXT:    xxswapd 34, 0
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -144,9 +125,8 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2f64_extload_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsspx 0, 0, 3
+; PWR8-BE-NEXT:    xxswapd 34, 0
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2f64_extload_1:
@@ -158,9 +138,7 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2f64_extload_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsspx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -179,9 +157,7 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2f64_load_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfd 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsdx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2f64_load_0:
@@ -193,9 +169,8 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2f64_load_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfd 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsdx 0, 0, 3
+; PWR8-LE-NEXT:    xxswapd 34, 0
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load double, ptr %p, align 8
@@ -213,9 +188,8 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2f64_load_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfd 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsdx 0, 0, 3
+; PWR8-BE-NEXT:    xxswapd 34, 0
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2f64_load_1:
@@ -227,9 +201,7 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2f64_load_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfd 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsdx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load double, ptr %p, align 8
@@ -250,11 +222,7 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2i64_load_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    ld 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    mtfprd 0, 4
-; PWR8-BE-NEXT:    mtfprd 1, 3
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsdx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2i64_load_0:
@@ -270,11 +238,8 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2i64_load_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    ld 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    mtfprd 0, 4
-; PWR8-LE-NEXT:    mtfprd 1, 3
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsdx 0, 0, 3
+; PWR8-LE-NEXT:    xxswapd 34, 0
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i64, ptr %p, align 8
@@ -295,11 +260,8 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v2i64_load_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    ld 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    mtfprd 0, 4
-; PWR8-BE-NEXT:    mtfprd 1, 3
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsdx 0, 0, 3
+; PWR8-BE-NEXT:    xxswapd 34, 0
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v2i64_load_1:
@@ -315,11 +277,7 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v2i64_load_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    ld 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    mtfprd 0, 4
-; PWR8-LE-NEXT:    mtfprd 1, 3
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsdx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i64, ptr %p, align 8
@@ -341,14 +299,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    li 5, 0
-; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT:    rldimi 5, 3, 32, 0
-; PWR8-BE-NEXT:    mtfprd 1, 4
-; PWR8-BE-NEXT:    mtfprd 0, 5
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 1
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_0:
@@ -365,13 +317,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    mtfprd 0, 3
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -393,13 +340,7 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT:    mtfprd 0, 3
-; PWR8-BE-NEXT:    mtfprd 1, 4
-; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_1:
@@ -416,14 +357,8 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    li 5, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 5, 3, 32, 0
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    mtfprd 0, 5
-; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -445,14 +380,8 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_2:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    li 5, 0
-; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT:    rldimi 5, 3, 32, 0
-; PWR8-BE-NEXT:    mtfprd 1, 4
-; PWR8-BE-NEXT:    mtfprd 0, 5
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_2:
@@ -469,13 +398,7 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_2:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    mtfprd 0, 3
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -497,13 +420,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4i32_load_3:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lwz 3, 0(3)
-; PWR8-BE-NEXT:    li 4, 0
-; PWR8-BE-NEXT:    rldimi 3, 4, 32, 0
-; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT:    mtfprd 0, 3
-; PWR8-BE-NEXT:    mtfprd 1, 4
-; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4i32_load_3:
@@ -520,14 +438,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4i32_load_3:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lwz 3, 0(3)
-; PWR8-LE-NEXT:    li 4, 0
-; PWR8-LE-NEXT:    li 5, 0
-; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT:    rldimi 5, 3, 32, 0
-; PWR8-LE-NEXT:    mtfprd 1, 4
-; PWR8-LE-NEXT:    mtfprd 0, 5
-; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 1
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -554,13 +466,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4f32_load_0:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 0, 0, 1
-; PWR8-BE-NEXT:    xxspltd 1, 1, 0
-; PWR8-BE-NEXT:    xvcvdpsp 34, 0
-; PWR8-BE-NEXT:    xvcvdpsp 35, 1
-; PWR8-BE-NEXT:    vmrgew 2, 2, 3
+; PWR8-BE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT:    xxsldwi 34, 0, 0, 1
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4f32_load_0:
@@ -584,13 +491,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4f32_load_0:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 0, 1, 0
-; PWR8-LE-NEXT:    xxspltd 1, 1, 0
-; PWR8-LE-NEXT:    xvcvdpsp 34, 0
-; PWR8-LE-NEXT:    xvcvdpsp 35, 1
-; PWR8-LE-NEXT:    vmrgew 2, 3, 2
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 2
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -617,13 +519,7 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4f32_load_1:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PWR8-BE-NEXT:    xxlxor 1, 1, 1
-; PWR8-BE-NEXT:    xxmrghd 0, 0, 1
-; PWR8-BE-NEXT:    xxspltd 1, 1, 0
-; PWR8-BE-NEXT:    xvcvdpsp 34, 0
-; PWR8-BE-NEXT:    xvcvdpsp 35, 1
-; PWR8-BE-NEXT:    vmrgew 2, 3, 2
+; PWR8-BE-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-BE-NEXT:    blr
 ;
 ; PWR7-LE-LABEL: build_v4f32_load_1:
@@ -647,13 +543,8 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-LE-LABEL: build_v4f32_load_1:
 ; PWR8-LE:       # %bb.0: # %entry
-; PWR8-LE-NEXT:    lfs 0, 0(3)
-; PWR8-LE-NEXT:    xxlxor 1, 1, 1
-; PWR8-LE-NEXT:    xxmrghd 0, 1, 0
-; PWR8-LE-NEXT:    xxspltd 1, 1, 0
-; PWR8-LE-NEXT:    xvcvdpsp 34, 0
-; PWR8-LE-NEXT:    xvcvdpsp 35, 1
-; PWR8-LE-NEXT:    vmrgew 2, 2, 3
+; PWR8-LE-NEXT:    lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT:    xxsldwi 34, 0, 0, 3
 ; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
@@ -680,13 +571,8 @@ define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) {
 ;
 ; PWR8-BE-LABEL: build_v4f32_load_2:
 ; PWR8-BE:       # %bb.0: # %entry
-; PWR8-BE-NEXT:    lfs 0, 0(3)
-; PW...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/126599


More information about the llvm-commits mailing list