[llvm] [PowerPC] Optimize BUILD_VECTOR from load and zeros (PR #126599)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 13:24:26 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Kamau Bridgeman (kamaub)
<details>
<summary>Changes</summary>
We are encountered with patterns like `BUILD_VECTOR 0, 0, (load), 0`
resulted in suboptimal codegen. This PR improves it.
Original Patch by: Kai Luo in #<!-- -->73609
---
Patch is 25.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126599.diff
3 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCInstrVSX.td (+136)
- (modified) llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll (+52-182)
- (modified) llvm/test/CodeGen/PowerPC/vec-promote.ll (+14-45)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 8e400bc63b7851a..783044ef02b56fc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2431,6 +2431,47 @@ def DblwdCmp {
(v2i64 (XXSPLTW EQWSHAND, 2)), 0));
}
+class SplatAndAssignIndexed<
+ SDPatternOperator op,
+ int Total, dag splat,
+ int Index, dag assign> {
+ defvar head = !listsplat(splat, Index);
+ defvar x = [assign];
+ defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
+ list<dag> Ops = !listconcat(head, x, tail);
+ dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
+}
+
+class BVExtLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 2, (f64 fpimm0),
+ Index, (f64 (extloadf32 ForceXForm:$src))>;
+
+class BVZExtLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 2, (i64 0),
+ Index, (i64 (zextloadi32 ForceXForm:$src))>;
+
+class BVLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 4, (i32 0),
+ Index, (i32 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 4, (f32 fpimm0),
+ Index, (f32 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosDbl<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 2, (f64 fpimm0),
+ Index, (f64 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosLong<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 2, (i64 0),
+ Index, (i64 (load ForceXForm:$src))>;
+
//---------------------------- Anonymous Patterns ----------------------------//
// Predicate combinations are kept in roughly chronological order in terms of
// instruction availability in the architecture. For example, VSX came in with
@@ -3449,6 +3490,53 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
sub_64), ForceXForm:$src)>;
}
+
+// BUILD_VECTOR via single load and zeros.
+// Extension load.
+def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
+
+// Normal load.
+foreach Index = !range(4) in {
+ defvar Temp = !sub(5, Index);
+ defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
+ if !ne(Offset, 0) then {
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+ (v4i32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+ Offset))>;
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+ (v4f32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+ Offset))>;
+ } else {
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+ }
+}
+
+def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
// Little endian Power8 VSX subtarget.
@@ -3542,6 +3630,54 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
sub_64), ForceXForm:$src)>;
}
+
+// BUILD_VECTOR via single load and zeros.
+// Extension load.
+def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
+
+// Normal load.
+foreach Index = !range(4) in {
+ defvar Temp = !sub(!add(Index, 4), 2);
+ defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
+ if !ne(Offset, 0) then {
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+ (v4i32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+ Offset))>;
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+ (v4f32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+ Offset))>;
+ } else {
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+ }
+}
+
+def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
} // HasVSX, HasP8Vector, IsLittleEndian
// Big endian pre-Power9 VSX subtarget.
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 2259b6e0f44df64..fb67221e7d9f306 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -17,11 +17,7 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2i64_extload_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: mtfprd 0, 4
-; PWR8-BE-NEXT: mtfprd 1, 3
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsiwzx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2i64_extload_0:
@@ -38,13 +34,8 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2i64_extload_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: mtfprd 0, 3
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -66,11 +57,8 @@ define <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2i64_extload_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: mtfprd 0, 4
-; PWR8-BE-NEXT: mtfprd 1, 3
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxswapd 34, 0
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2i64_extload_1:
@@ -86,11 +74,7 @@ define <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2i64_extload_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: mtfprd 0, 4
-; PWR8-LE-NEXT: mtfprd 1, 3
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsiwzx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -109,9 +93,7 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2f64_extload_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsspx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2f64_extload_0:
@@ -123,9 +105,8 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2f64_extload_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsspx 0, 0, 3
+; PWR8-LE-NEXT: xxswapd 34, 0
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -144,9 +125,8 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2f64_extload_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsspx 0, 0, 3
+; PWR8-BE-NEXT: xxswapd 34, 0
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2f64_extload_1:
@@ -158,9 +138,7 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2f64_extload_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsspx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -179,9 +157,7 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2f64_load_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfd 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsdx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2f64_load_0:
@@ -193,9 +169,8 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2f64_load_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfd 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsdx 0, 0, 3
+; PWR8-LE-NEXT: xxswapd 34, 0
; PWR8-LE-NEXT: blr
entry:
%0 = load double, ptr %p, align 8
@@ -213,9 +188,8 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2f64_load_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfd 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsdx 0, 0, 3
+; PWR8-BE-NEXT: xxswapd 34, 0
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2f64_load_1:
@@ -227,9 +201,7 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2f64_load_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfd 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsdx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load double, ptr %p, align 8
@@ -250,11 +222,7 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2i64_load_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: ld 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: mtfprd 0, 4
-; PWR8-BE-NEXT: mtfprd 1, 3
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsdx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2i64_load_0:
@@ -270,11 +238,8 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2i64_load_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: ld 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: mtfprd 0, 4
-; PWR8-LE-NEXT: mtfprd 1, 3
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsdx 0, 0, 3
+; PWR8-LE-NEXT: xxswapd 34, 0
; PWR8-LE-NEXT: blr
entry:
%0 = load i64, ptr %p, align 8
@@ -295,11 +260,8 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2i64_load_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: ld 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: mtfprd 0, 4
-; PWR8-BE-NEXT: mtfprd 1, 3
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsdx 0, 0, 3
+; PWR8-BE-NEXT: xxswapd 34, 0
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2i64_load_1:
@@ -315,11 +277,7 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2i64_load_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: ld 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: mtfprd 0, 4
-; PWR8-LE-NEXT: mtfprd 1, 3
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsdx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load i64, ptr %p, align 8
@@ -341,14 +299,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4i32_load_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: li 5, 0
-; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT: rldimi 5, 3, 32, 0
-; PWR8-BE-NEXT: mtfprd 1, 4
-; PWR8-BE-NEXT: mtfprd 0, 5
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 1
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4i32_load_0:
@@ -365,13 +317,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4i32_load_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: mtfprd 0, 3
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -393,13 +340,7 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4i32_load_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT: mtfprd 0, 3
-; PWR8-BE-NEXT: mtfprd 1, 4
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsiwzx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4i32_load_1:
@@ -416,14 +357,8 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4i32_load_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: li 5, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 5, 3, 32, 0
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: mtfprd 0, 5
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -445,14 +380,8 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4i32_load_2:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: li 5, 0
-; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT: rldimi 5, 3, 32, 0
-; PWR8-BE-NEXT: mtfprd 1, 4
-; PWR8-BE-NEXT: mtfprd 0, 5
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4i32_load_2:
@@ -469,13 +398,7 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4i32_load_2:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: mtfprd 0, 3
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsiwzx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -497,13 +420,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4i32_load_3:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT: mtfprd 0, 3
-; PWR8-BE-NEXT: mtfprd 1, 4
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4i32_load_3:
@@ -520,14 +438,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4i32_load_3:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: li 5, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 5, 3, 32, 0
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: mtfprd 0, 5
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 1
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -554,13 +466,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4f32_load_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 0, 0, 1
-; PWR8-BE-NEXT: xxspltd 1, 1, 0
-; PWR8-BE-NEXT: xvcvdpsp 34, 0
-; PWR8-BE-NEXT: xvcvdpsp 35, 1
-; PWR8-BE-NEXT: vmrgew 2, 2, 3
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 1
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4f32_load_0:
@@ -584,13 +491,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4f32_load_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 0, 1, 0
-; PWR8-LE-NEXT: xxspltd 1, 1, 0
-; PWR8-LE-NEXT: xvcvdpsp 34, 0
-; PWR8-LE-NEXT: xvcvdpsp 35, 1
-; PWR8-LE-NEXT: vmrgew 2, 3, 2
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -617,13 +519,7 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4f32_load_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 0, 0, 1
-; PWR8-BE-NEXT: xxspltd 1, 1, 0
-; PWR8-BE-NEXT: xvcvdpsp 34, 0
-; PWR8-BE-NEXT: xvcvdpsp 35, 1
-; PWR8-BE-NEXT: vmrgew 2, 3, 2
+; PWR8-BE-NEXT: lxsiwzx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4f32_load_1:
@@ -647,13 +543,8 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4f32_load_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 0, 1, 0
-; PWR8-LE-NEXT: xxspltd 1, 1, 0
-; PWR8-LE-NEXT: xvcvdpsp 34, 0
-; PWR8-LE-NEXT: xvcvdpsp 35, 1
-; PWR8-LE-NEXT: vmrgew 2, 2, 3
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -680,13 +571,8 @@ define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4f32_load_2:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PW...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/126599
More information about the llvm-commits
mailing list