[llvm] [PowerPC] Optimize BUILD_VECTOR from load and zeros (PR #73609)
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 29 21:55:34 PST 2023
https://github.com/bzEq updated https://github.com/llvm/llvm-project/pull/73609
>From fe5c6b5cb324774e79863ac898d1acecbe2310b4 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:18:08 +0000
Subject: [PATCH] Optimize BUILD_VECTOR
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 136 ++++++++++
.../build-vector-from-load-and-zeros.ll | 234 ++++--------------
llvm/test/CodeGen/PowerPC/vec-promote.ll | 59 ++---
3 files changed, 202 insertions(+), 227 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0e5f6b773bb5441..c479bca70fef2fd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2431,6 +2431,47 @@ def DblwdCmp {
(v2i64 (XXSPLTW EQWSHAND, 2)), 0));
}
+class SplatAndAssignIndexed<
+ SDPatternOperator op,
+ int Total, dag splat,
+ int Index, dag assign> {
+ defvar head = !listsplat(splat, Index);
+ defvar x = [assign];
+ defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
+ list<dag> Ops = !listconcat(head, x, tail);
+ dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
+}
+
+class BVExtLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 2, (f64 fpimm0),
+ Index, (f64 (extloadf32 ForceXForm:$src))>;
+
+class BVZExtLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 2, (i64 0),
+ Index, (i64 (zextloadi32 ForceXForm:$src))>;
+
+class BVLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 4, (i32 0),
+ Index, (i32 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 4, (f32 fpimm0),
+ Index, (f32 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosDbl<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 2, (f64 fpimm0),
+ Index, (f64 (load ForceXForm:$src))>;
+
+class BVLoadAndZerosLong<int Index> : SplatAndAssignIndexed<
+ build_vector,
+ 2, (i64 0),
+ Index, (i64 (load ForceXForm:$src))>;
+
//---------------------------- Anonymous Patterns ----------------------------//
// Predicate combinations are kept in roughly chronological order in terms of
// instruction availability in the architecture. For example, VSX came in with
@@ -3449,6 +3490,53 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
sub_64), ForceXForm:$src)>;
}
+
+// BUILD_VECTOR via single load and zeros.
+// Extension load.
+def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
+
+// Normal load.
+foreach Index = !range(4) in {
+ defvar Temp = !sub(5, Index);
+ defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
+ if !ne(Offset, 0) then {
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+ (v4i32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+ Offset))>;
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+ (v4f32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+ Offset))>;
+ } else {
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+ }
+}
+
+def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
// Little endian Power8 VSX subtarget.
@@ -3542,6 +3630,54 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
sub_64), ForceXForm:$src)>;
}
+
+// BUILD_VECTOR via single load and zeros.
+// Extension load.
+def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
+
+// Normal load.
+foreach Index = !range(4) in {
+ defvar Temp = !sub(!add(Index, 4), 2);
+ defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
+ if !ne(Offset, 0) then {
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+ (v4i32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+ Offset))>;
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+ (v4f32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
+ Offset))>;
+ } else {
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+ }
+}
+
+def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
+def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
+
} // HasVSX, HasP8Vector, IsLittleEndian
// Big endian pre-Power9 VSX subtarget.
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index cc32a76b22c2872..a2f65ad75972e69 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -17,11 +17,7 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2i64_extload_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: mtfprd 0, 4
-; PWR8-BE-NEXT: mtfprd 1, 3
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsiwzx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2i64_extload_0:
@@ -45,13 +41,8 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2i64_extload_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: mtfprd 0, 3
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -73,11 +64,8 @@ define <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2i64_extload_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: mtfprd 0, 4
-; PWR8-BE-NEXT: mtfprd 1, 3
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxswapd 34, 0
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2i64_extload_1:
@@ -93,11 +81,7 @@ define <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2i64_extload_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: mtfprd 0, 4
-; PWR8-LE-NEXT: mtfprd 1, 3
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsiwzx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -116,9 +100,7 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2f64_extload_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsspx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2f64_extload_0:
@@ -130,9 +112,8 @@ define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2f64_extload_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsspx 0, 0, 3
+; PWR8-LE-NEXT: xxswapd 34, 0
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -151,9 +132,8 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2f64_extload_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsspx 0, 0, 3
+; PWR8-BE-NEXT: xxswapd 34, 0
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2f64_extload_1:
@@ -165,9 +145,7 @@ define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2f64_extload_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsspx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -186,9 +164,7 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2f64_load_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfd 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsdx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2f64_load_0:
@@ -200,9 +176,8 @@ define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2f64_load_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfd 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsdx 0, 0, 3
+; PWR8-LE-NEXT: xxswapd 34, 0
; PWR8-LE-NEXT: blr
entry:
%0 = load double, ptr %p, align 8
@@ -220,9 +195,8 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2f64_load_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfd 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsdx 0, 0, 3
+; PWR8-BE-NEXT: xxswapd 34, 0
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2f64_load_1:
@@ -234,9 +208,7 @@ define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2f64_load_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfd 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsdx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load double, ptr %p, align 8
@@ -257,11 +229,7 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2i64_load_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: ld 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: mtfprd 0, 4
-; PWR8-BE-NEXT: mtfprd 1, 3
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsdx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2i64_load_0:
@@ -277,11 +245,8 @@ define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2i64_load_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: ld 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: mtfprd 0, 4
-; PWR8-LE-NEXT: mtfprd 1, 3
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsdx 0, 0, 3
+; PWR8-LE-NEXT: xxswapd 34, 0
; PWR8-LE-NEXT: blr
entry:
%0 = load i64, ptr %p, align 8
@@ -302,11 +267,8 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v2i64_load_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: ld 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: mtfprd 0, 4
-; PWR8-BE-NEXT: mtfprd 1, 3
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsdx 0, 0, 3
+; PWR8-BE-NEXT: xxswapd 34, 0
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v2i64_load_1:
@@ -322,11 +284,7 @@ define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v2i64_load_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: ld 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: mtfprd 0, 4
-; PWR8-LE-NEXT: mtfprd 1, 3
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsdx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load i64, ptr %p, align 8
@@ -353,14 +311,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4i32_load_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: li 5, 0
-; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT: rldimi 5, 3, 32, 0
-; PWR8-BE-NEXT: mtfprd 1, 4
-; PWR8-BE-NEXT: mtfprd 0, 5
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 1
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4i32_load_0:
@@ -384,13 +336,8 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4i32_load_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: mtfprd 0, 3
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -417,13 +364,7 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4i32_load_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT: mtfprd 0, 3
-; PWR8-BE-NEXT: mtfprd 1, 4
-; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: lxsiwzx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4i32_load_1:
@@ -447,14 +388,8 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4i32_load_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: li 5, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 5, 3, 32, 0
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: mtfprd 0, 5
-; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -481,14 +416,8 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4i32_load_2:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: li 5, 0
-; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT: rldimi 5, 3, 32, 0
-; PWR8-BE-NEXT: mtfprd 1, 4
-; PWR8-BE-NEXT: mtfprd 0, 5
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4i32_load_2:
@@ -512,13 +441,7 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4i32_load_2:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: mtfprd 0, 3
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsiwzx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -545,13 +468,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4i32_load_3:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lwz 3, 0(3)
-; PWR8-BE-NEXT: li 4, 0
-; PWR8-BE-NEXT: rldimi 3, 4, 32, 0
-; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-BE-NEXT: mtfprd 0, 3
-; PWR8-BE-NEXT: mtfprd 1, 4
-; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4i32_load_3:
@@ -575,14 +493,8 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4i32_load_3:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lwz 3, 0(3)
-; PWR8-LE-NEXT: li 4, 0
-; PWR8-LE-NEXT: li 5, 0
-; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
-; PWR8-LE-NEXT: rldimi 5, 3, 32, 0
-; PWR8-LE-NEXT: mtfprd 1, 4
-; PWR8-LE-NEXT: mtfprd 0, 5
-; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 1
; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -609,13 +521,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4f32_load_0:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 0, 0, 1
-; PWR8-BE-NEXT: xxspltd 1, 1, 0
-; PWR8-BE-NEXT: xvcvdpsp 34, 0
-; PWR8-BE-NEXT: xvcvdpsp 35, 1
-; PWR8-BE-NEXT: vmrgew 2, 2, 3
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 1
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4f32_load_0:
@@ -639,13 +546,8 @@ define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4f32_load_0:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 0, 1, 0
-; PWR8-LE-NEXT: xxspltd 1, 1, 0
-; PWR8-LE-NEXT: xvcvdpsp 34, 0
-; PWR8-LE-NEXT: xvcvdpsp 35, 1
-; PWR8-LE-NEXT: vmrgew 2, 3, 2
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -672,13 +574,7 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4f32_load_1:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 0, 0, 1
-; PWR8-BE-NEXT: xxspltd 1, 1, 0
-; PWR8-BE-NEXT: xvcvdpsp 34, 0
-; PWR8-BE-NEXT: xvcvdpsp 35, 1
-; PWR8-BE-NEXT: vmrgew 2, 3, 2
+; PWR8-BE-NEXT: lxsiwzx 34, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4f32_load_1:
@@ -702,13 +598,8 @@ define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4f32_load_1:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 0, 1, 0
-; PWR8-LE-NEXT: xxspltd 1, 1, 0
-; PWR8-LE-NEXT: xvcvdpsp 34, 0
-; PWR8-LE-NEXT: xvcvdpsp 35, 1
-; PWR8-LE-NEXT: vmrgew 2, 2, 3
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -735,13 +626,8 @@ define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4f32_load_2:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 0, 1, 0
-; PWR8-BE-NEXT: xxspltd 1, 1, 0
-; PWR8-BE-NEXT: xvcvdpsp 34, 0
-; PWR8-BE-NEXT: xvcvdpsp 35, 1
-; PWR8-BE-NEXT: vmrgew 2, 2, 3
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 3
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4f32_load_2:
@@ -765,13 +651,7 @@ define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4f32_load_2:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 0, 0, 1
-; PWR8-LE-NEXT: xxspltd 1, 1, 0
-; PWR8-LE-NEXT: xvcvdpsp 34, 0
-; PWR8-LE-NEXT: xvcvdpsp 35, 1
-; PWR8-LE-NEXT: vmrgew 2, 3, 2
+; PWR8-LE-NEXT: lxsiwzx 34, 0, 3
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
@@ -798,13 +678,8 @@ define <4 x float> @build_v4f32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR8-BE-LABEL: build_v4f32_load_3:
; PWR8-BE: # %bb.0: # %entry
-; PWR8-BE-NEXT: lfs 0, 0(3)
-; PWR8-BE-NEXT: xxlxor 1, 1, 1
-; PWR8-BE-NEXT: xxmrghd 0, 1, 0
-; PWR8-BE-NEXT: xxspltd 1, 1, 0
-; PWR8-BE-NEXT: xvcvdpsp 34, 0
-; PWR8-BE-NEXT: xvcvdpsp 35, 1
-; PWR8-BE-NEXT: vmrgew 2, 3, 2
+; PWR8-BE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-BE-NEXT: xxsldwi 34, 0, 0, 2
; PWR8-BE-NEXT: blr
;
; PWR7-LE-LABEL: build_v4f32_load_3:
@@ -828,13 +703,8 @@ define <4 x float> @build_v4f32_load_3(ptr nocapture noundef readonly %p) {
;
; PWR8-LE-LABEL: build_v4f32_load_3:
; PWR8-LE: # %bb.0: # %entry
-; PWR8-LE-NEXT: lfs 0, 0(3)
-; PWR8-LE-NEXT: xxlxor 1, 1, 1
-; PWR8-LE-NEXT: xxmrghd 0, 0, 1
-; PWR8-LE-NEXT: xxspltd 1, 1, 0
-; PWR8-LE-NEXT: xvcvdpsp 34, 0
-; PWR8-LE-NEXT: xvcvdpsp 35, 1
-; PWR8-LE-NEXT: vmrgew 2, 2, 3
+; PWR8-LE-NEXT: lxsiwzx 0, 0, 3
+; PWR8-LE-NEXT: xxsldwi 34, 0, 0, 1
; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
index 628c5101c079652..1715532f07792ff 100644
--- a/llvm/test/CodeGen/PowerPC/vec-promote.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -7,16 +7,13 @@
define noundef <2 x double> @vec_promote_double_zeroed(ptr nocapture noundef readonly %p) {
; CHECK-BE-LABEL: vec_promote_double_zeroed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lfd 0, 0(3)
-; CHECK-BE-NEXT: xxlxor 1, 1, 1
-; CHECK-BE-NEXT: xxmrghd 34, 0, 1
+; CHECK-BE-NEXT: lxsdx 34, 0, 3
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: vec_promote_double_zeroed:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: lfd 0, 0(3)
-; CHECK-LE-NEXT: xxlxor 1, 1, 1
-; CHECK-LE-NEXT: xxmrghd 34, 1, 0
+; CHECK-LE-NEXT: lxsdx 0, 0, 3
+; CHECK-LE-NEXT: xxswapd 34, 0
; CHECK-LE-NEXT: blr
entry:
%0 = load double, ptr %p, align 8
@@ -43,24 +40,14 @@ entry:
define noundef <4 x float> @vec_promote_float_zeroed(ptr nocapture noundef readonly %p) {
; CHECK-BE-LABEL: vec_promote_float_zeroed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lfs 0, 0(3)
-; CHECK-BE-NEXT: xxlxor 1, 1, 1
-; CHECK-BE-NEXT: xxmrghd 0, 0, 1
-; CHECK-BE-NEXT: xxspltd 1, 1, 0
-; CHECK-BE-NEXT: xvcvdpsp 34, 0
-; CHECK-BE-NEXT: xvcvdpsp 35, 1
-; CHECK-BE-NEXT: vmrgew 2, 2, 3
+; CHECK-BE-NEXT: lxsiwzx 0, 0, 3
+; CHECK-BE-NEXT: xxsldwi 34, 0, 0, 1
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: vec_promote_float_zeroed:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: lfs 0, 0(3)
-; CHECK-LE-NEXT: xxlxor 1, 1, 1
-; CHECK-LE-NEXT: xxmrghd 0, 1, 0
-; CHECK-LE-NEXT: xxspltd 1, 1, 0
-; CHECK-LE-NEXT: xvcvdpsp 34, 0
-; CHECK-LE-NEXT: xvcvdpsp 35, 1
-; CHECK-LE-NEXT: vmrgew 2, 3, 2
+; CHECK-LE-NEXT: lxsiwzx 0, 0, 3
+; CHECK-LE-NEXT: xxsldwi 34, 0, 0, 2
; CHECK-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 8
@@ -89,20 +76,13 @@ entry:
define noundef <2 x i64> @vec_promote_long_long_zeroed(ptr nocapture noundef readonly %p) {
; CHECK-BE-LABEL: vec_promote_long_long_zeroed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: ld 3, 0(3)
-; CHECK-BE-NEXT: li 4, 0
-; CHECK-BE-NEXT: mtfprd 0, 4
-; CHECK-BE-NEXT: mtfprd 1, 3
-; CHECK-BE-NEXT: xxmrghd 34, 1, 0
+; CHECK-BE-NEXT: lxsdx 34, 0, 3
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: vec_promote_long_long_zeroed:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: ld 3, 0(3)
-; CHECK-LE-NEXT: li 4, 0
-; CHECK-LE-NEXT: mtfprd 0, 4
-; CHECK-LE-NEXT: mtfprd 1, 3
-; CHECK-LE-NEXT: xxmrghd 34, 0, 1
+; CHECK-LE-NEXT: lxsdx 0, 0, 3
+; CHECK-LE-NEXT: xxswapd 34, 0
; CHECK-LE-NEXT: blr
entry:
%0 = load i64, ptr %p, align 8
@@ -129,25 +109,14 @@ entry:
define noundef <4 x i32> @vec_promote_int_zeroed(ptr nocapture noundef readonly %p) {
; CHECK-BE-LABEL: vec_promote_int_zeroed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lwz 3, 0(3)
-; CHECK-BE-NEXT: li 4, 0
-; CHECK-BE-NEXT: li 5, 0
-; CHECK-BE-NEXT: rldimi 4, 4, 32, 0
-; CHECK-BE-NEXT: rldimi 5, 3, 32, 0
-; CHECK-BE-NEXT: mtfprd 1, 4
-; CHECK-BE-NEXT: mtfprd 0, 5
-; CHECK-BE-NEXT: xxmrghd 34, 0, 1
+; CHECK-BE-NEXT: lxsiwzx 0, 0, 3
+; CHECK-BE-NEXT: xxsldwi 34, 0, 0, 1
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: vec_promote_int_zeroed:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: lwz 3, 0(3)
-; CHECK-LE-NEXT: li 4, 0
-; CHECK-LE-NEXT: rldimi 3, 4, 32, 0
-; CHECK-LE-NEXT: rldimi 4, 4, 32, 0
-; CHECK-LE-NEXT: mtfprd 0, 3
-; CHECK-LE-NEXT: mtfprd 1, 4
-; CHECK-LE-NEXT: xxmrghd 34, 1, 0
+; CHECK-LE-NEXT: lxsiwzx 0, 0, 3
+; CHECK-LE-NEXT: xxsldwi 34, 0, 0, 2
; CHECK-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
More information about the llvm-commits
mailing list