[llvm] [PowerPC] Optimize BUILD_VECTOR from load and zeros (PR #73609)
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 29 01:16:59 PST 2023
https://github.com/bzEq updated https://github.com/llvm/llvm-project/pull/73609
>From 587f8e153f13c4f44baa876a0f48f89056c29432 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:18:08 +0000
Subject: [PATCH 01/12] Up
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0e5f6b773bb5441..cc0009ff69e7d37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3403,6 +3403,12 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
(f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
+def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+
+
defm : ScalToVecWPermute<
v4i32, (i32 (load ForceXForm:$src)),
(XXSLDWIs (LIWZX ForceXForm:$src), 1),
>From ea7eb55adfb1be9f76909b7e076c13930bc97e69 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:18:46 +0000
Subject: [PATCH 02/12] Format
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index cc0009ff69e7d37..e6a7f240b265597 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3405,6 +3405,7 @@ def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
(v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+
def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
(v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
>From 8ac488546ba9cfcb7db34530f7f4158ddb08051c Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:29:22 +0000
Subject: [PATCH 03/12] Update test
---
.../PowerPC/build-vector-from-load-and-zeros.ll | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 16c2617b3564931..cf4c6c61840c72b 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -15,11 +15,7 @@ define <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p, <2 x i64> noun
;
; PWR8-LABEL: build_v2i64:
; PWR8: # %bb.0: # %entry
-; PWR8-NEXT: lwz 3, 0(3)
-; PWR8-NEXT: li 4, 0
-; PWR8-NEXT: mtfprd 0, 4
-; PWR8-NEXT: mtfprd 1, 3
-; PWR8-NEXT: xxmrghd 34, 1, 0
+; PWR8-NEXT: lxsiwzx 34, 0, 3
; PWR8-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
@@ -38,9 +34,7 @@ define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p, <2 x double>
;
; PWR8-LABEL: build_v2f64:
; PWR8: # %bb.0: # %entry
-; PWR8-NEXT: lfs 0, 0(3)
-; PWR8-NEXT: xxlxor 1, 1, 1
-; PWR8-NEXT: xxmrghd 34, 0, 1
+; PWR8-NEXT: lxsspx 34, 0, 3
; PWR8-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
>From 894e99867c719d75a4eb4e7fd58b37a3d13446a3 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 05:12:54 +0000
Subject: [PATCH 04/12] Minor
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e6a7f240b265597..c1079966ff3a2e6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3403,13 +3403,6 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
(f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
-def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
- (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
-
-def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
- (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-
-
defm : ScalToVecWPermute<
v4i32, (i32 (load ForceXForm:$src)),
(XXSLDWIs (LIWZX ForceXForm:$src), 1),
@@ -3444,6 +3437,12 @@ def : Pat<(store (i32 (extractelt v4i32:$A, 1)), ForceXForm:$src),
def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
(STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
+// BUILD_VECTOR via loads and zeros.
+def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+
// Elements in a register on a BE system are in order <0, 1, 2, 3>.
// The store instructions store the second word from the left.
// So to align element zero, we need to modulo-left-shift by 3 words.
>From 2361068029688d69981057d7673a0ed8c69e7eae Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 07:37:33 +0000
Subject: [PATCH 05/12] Use general DAG
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index c1079966ff3a2e6..76534ded71d603a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2431,6 +2431,24 @@ def DblwdCmp {
(v2i64 (XXSPLTW EQWSHAND, 2)), 0));
}
+class SplatAndAssignIndexed<
+ SDPatternOperator op,
+ dag splat,
+ int Total,
+ int Index, dag assign> {
+ defvar head = !listsplat(splat, Index);
+ defvar x = [assign];
+ defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
+ defvar Ops = !listconcat(head, x, tail);
+ dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
+}
+
+def BVLoadAndZeroFP : SplatAndAssignIndexed<
+ build_vector,
+ (f64 fpimm0),
+ 2, 0,
+ (f64 (extloadf32 ForceXForm:$src))>;
+
//---------------------------- Anonymous Patterns ----------------------------//
// Predicate combinations are kept in roughly chronological order in terms of
// instruction availability in the architecture. For example, VSX came in with
@@ -3438,10 +3456,15 @@ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
(STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
// BUILD_VECTOR via loads and zeros.
-def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+//def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+// (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZeroFP.DAG),
(v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
(v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v4f32 (build_vector (f32 fpimm0), (f32 (load ForceXForm:$src)),
+ (f32 fpimm0), (f32 fpimm0))),
+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
// Elements in a register on a BE system are in order <0, 1, 2, 3>.
// The store instructions store the second word from the left.
>From f9176b94611ade46de119f3706c3393e1c0e1458 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 07:41:27 +0000
Subject: [PATCH 06/12] Minor
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 76534ded71d603a..440bfae99452c0b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2433,8 +2433,7 @@ def DblwdCmp {
class SplatAndAssignIndexed<
SDPatternOperator op,
- dag splat,
- int Total,
+ int Total, dag splat,
int Index, dag assign> {
defvar head = !listsplat(splat, Index);
defvar x = [assign];
@@ -2445,9 +2444,8 @@ class SplatAndAssignIndexed<
def BVLoadAndZeroFP : SplatAndAssignIndexed<
build_vector,
- (f64 fpimm0),
- 2, 0,
- (f64 (extloadf32 ForceXForm:$src))>;
+ 2, (f64 fpimm0),
+ 0, (f64 (extloadf32 ForceXForm:$src))>;
//---------------------------- Anonymous Patterns ----------------------------//
// Predicate combinations are kept in roughly chronological order in terms of
@@ -3456,15 +3454,13 @@ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
(STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
// BUILD_VECTOR via loads and zeros.
-//def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
-// (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
def : Pat<(v2f64 BVLoadAndZeroFP.DAG),
(v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
(v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-def : Pat<(v4f32 (build_vector (f32 fpimm0), (f32 (load ForceXForm:$src)),
- (f32 fpimm0), (f32 fpimm0))),
- (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+// def : Pat<(v4f32 (build_vector (f32 fpimm0), (f32 (load ForceXForm:$src)),
+// (f32 fpimm0), (f32 fpimm0))),
+// (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
// Elements in a register on a BE system are in order <0, 1, 2, 3>.
// The store instructions store the second word from the left.
>From a6cee02cda9b18f894654cc37d6aec1785dd6240 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 08:20:07 +0000
Subject: [PATCH 07/12] Up
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 30 ++++++++++++++++++------
llvm/test/CodeGen/PowerPC/vec-promote.ll | 4 +---
2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 440bfae99452c0b..97919bdfee3099f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2438,15 +2438,30 @@ class SplatAndAssignIndexed<
defvar head = !listsplat(splat, Index);
defvar x = [assign];
defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
- defvar Ops = !listconcat(head, x, tail);
+ list<dag> Ops = !listconcat(head, x, tail);
dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
}
-def BVLoadAndZeroFP : SplatAndAssignIndexed<
+def BVExtLoadAndZeroFP : SplatAndAssignIndexed<
build_vector,
2, (f64 fpimm0),
0, (f64 (extloadf32 ForceXForm:$src))>;
+def BVZExtLoadAndZeroInt : SplatAndAssignIndexed<
+ build_vector,
+ 2, (i64 0),
+ 0, (i64 (zextloadi32 ForceXForm:$src))>;
+
+def BVLoadAndZeroFP : SplatAndAssignIndexed<
+ build_vector,
+ 4, (f32 fpimm0),
+ 1, (f32 (load ForceXForm:$src))>;
+
+def BVLoadAndZeroDbl : SplatAndAssignIndexed<
+ build_vector,
+ 2, (f64 fpimm0),
+ 0, (f64 (load ForceXForm:$src))>;
+
//---------------------------- Anonymous Patterns ----------------------------//
// Predicate combinations are kept in roughly chronological order in terms of
// instruction availability in the architecture. For example, VSX came in with
@@ -3454,13 +3469,14 @@ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
(STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
// BUILD_VECTOR via loads and zeros.
-def : Pat<(v2f64 BVLoadAndZeroFP.DAG),
+def : Pat<(v2f64 BVExtLoadAndZeroFP.DAG),
(v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
-def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
+def : Pat<(v2i64 BVZExtLoadAndZeroInt.DAG),
(v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-// def : Pat<(v4f32 (build_vector (f32 fpimm0), (f32 (load ForceXForm:$src)),
-// (f32 fpimm0), (f32 fpimm0))),
-// (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v4f32 BVLoadAndZeroFP.DAG),
+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZeroDbl.DAG),
+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
// Elements in a register on a BE system are in order <0, 1, 2, 3>.
// The store instructions store the second word from the left.
diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
index 628c5101c079652..31a9480c4e7508e 100644
--- a/llvm/test/CodeGen/PowerPC/vec-promote.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -7,9 +7,7 @@
define noundef <2 x double> @vec_promote_double_zeroed(ptr nocapture noundef readonly %p) {
; CHECK-BE-LABEL: vec_promote_double_zeroed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lfd 0, 0(3)
-; CHECK-BE-NEXT: xxlxor 1, 1, 1
-; CHECK-BE-NEXT: xxmrghd 34, 0, 1
+; CHECK-BE-NEXT: lxsdx 34, 0, 3
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: vec_promote_double_zeroed:
>From 40757f357c53df2027620d9ceb8e2564d88b0c70 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 08:36:13 +0000
Subject: [PATCH 08/12] More test change
---
.../build-vector-from-load-and-zeros.ll | 31 +++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index cf4c6c61840c72b..ac75b81f62d98d0 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -2,7 +2,7 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck --check-prefix=PWR8 %s
-define <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p, <2 x i64> noundef %a) {
+define <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p) {
; CHECK-LABEL: build_v2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwz 3, 0(3)
@@ -24,7 +24,7 @@ entry:
ret <2 x i64> %vecinit1
}
-define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p, <2 x double> noundef %a) {
+define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p) {
; CHECK-LABEL: build_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfs 0, 0(3)
@@ -42,3 +42,30 @@ entry:
%vecinit1 = insertelement <2 x double> <double poison, double 0.000000e+00>, double %conv, i64 0
ret <2 x double> %vecinit1
}
+
+define <4 x float> @build_v4f32(ptr nocapture noundef readonly %p) {
+; CHECK-LABEL: build_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: li 4, 0
+; CHECK-NEXT: stw 4, -32(1)
+; CHECK-NEXT: stw 3, -16(1)
+; CHECK-NEXT: addis 3, 2, .LCPI2_0 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI2_0 at toc@l
+; CHECK-NEXT: lxvw4x 34, 0, 3
+; CHECK-NEXT: addi 3, 1, -32
+; CHECK-NEXT: lxvw4x 35, 0, 3
+; CHECK-NEXT: addi 3, 1, -16
+; CHECK-NEXT: lxvw4x 36, 0, 3
+; CHECK-NEXT: vperm 2, 3, 4, 2
+; CHECK-NEXT: blr
+;
+; PWR8-LABEL: build_v4f32:
+; PWR8: # %bb.0: # %entry
+; PWR8-NEXT: lxsiwzx 34, 0, 3
+; PWR8-NEXT: blr
+entry:
+ %0 = load float, ptr %p, align 4
+ %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00,float 0.000000e+00,float 0.000000e+00>, float %0, i64 1
+ ret <4 x float> %vecinit1
+}
>From 5ae66707fdc0a59e681d920eb490ad6a45b31261 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 08:37:11 +0000
Subject: [PATCH 09/12] Minor
---
llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index ac75b81f62d98d0..b572f94377db1ff 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -66,6 +66,6 @@ define <4 x float> @build_v4f32(ptr nocapture noundef readonly %p) {
; PWR8-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
- %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00,float 0.000000e+00,float 0.000000e+00>, float %0, i64 1
+ %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00>, float %0, i64 1
ret <4 x float> %vecinit1
}
>From f6cdbbdafc095f4753b92d94eb892b6f9e6a684c Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 09:06:42 +0000
Subject: [PATCH 10/12] Basic cases
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 97919bdfee3099f..316038a77b08e44 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2457,11 +2457,21 @@ def BVLoadAndZeroFP : SplatAndAssignIndexed<
4, (f32 fpimm0),
1, (f32 (load ForceXForm:$src))>;
+def BVLoadAndZeroInt : SplatAndAssignIndexed<
+ build_vector,
+ 4, (i32 0),
+ 1, (i32 (load ForceXForm:$src))>;
+
def BVLoadAndZeroDbl : SplatAndAssignIndexed<
build_vector,
2, (f64 fpimm0),
0, (f64 (load ForceXForm:$src))>;
+def BVLoadAndZeroLong : SplatAndAssignIndexed<
+ build_vector,
+ 2, (i64 0),
+ 0, (i64 (load ForceXForm:$src))>;
+
//---------------------------- Anonymous Patterns ----------------------------//
// Predicate combinations are kept in roughly chronological order in terms of
// instruction availability in the architecture. For example, VSX came in with
@@ -3469,14 +3479,20 @@ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
(STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
// BUILD_VECTOR via loads and zeros.
+// Extension load.
def : Pat<(v2f64 BVExtLoadAndZeroFP.DAG),
(v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
def : Pat<(v2i64 BVZExtLoadAndZeroInt.DAG),
(v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+// Normal load.
def : Pat<(v4f32 BVLoadAndZeroFP.DAG),
(v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v4i32 BVLoadAndZeroInt.DAG),
+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
def : Pat<(v2f64 BVLoadAndZeroDbl.DAG),
(v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZeroLong.DAG),
+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
// Elements in a register on a BE system are in order <0, 1, 2, 3>.
// The store instructions store the second word from the left.
>From 9628746c89c46207ce7f1070af513b56e9ab81a0 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 09:07:28 +0000
Subject: [PATCH 11/12] Up
---
llvm/test/CodeGen/PowerPC/vec-promote.ll | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
index 31a9480c4e7508e..d8c541c48db5255 100644
--- a/llvm/test/CodeGen/PowerPC/vec-promote.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -87,11 +87,7 @@ entry:
define noundef <2 x i64> @vec_promote_long_long_zeroed(ptr nocapture noundef readonly %p) {
; CHECK-BE-LABEL: vec_promote_long_long_zeroed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: ld 3, 0(3)
-; CHECK-BE-NEXT: li 4, 0
-; CHECK-BE-NEXT: mtfprd 0, 4
-; CHECK-BE-NEXT: mtfprd 1, 3
-; CHECK-BE-NEXT: xxmrghd 34, 1, 0
+; CHECK-BE-NEXT: lxsdx 34, 0, 3
; CHECK-BE-NEXT: blr
;
; CHECK-LE-LABEL: vec_promote_long_long_zeroed:
>From e512da698869072fb34755a974a7a256f4b1e94a Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 09:12:21 +0000
Subject: [PATCH 12/12] Minor
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 316038a77b08e44..945b8e601b4aee7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2442,32 +2442,32 @@ class SplatAndAssignIndexed<
dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
}
-def BVExtLoadAndZeroFP : SplatAndAssignIndexed<
+def BVExtLoadAndZerosFP : SplatAndAssignIndexed<
build_vector,
2, (f64 fpimm0),
0, (f64 (extloadf32 ForceXForm:$src))>;
-def BVZExtLoadAndZeroInt : SplatAndAssignIndexed<
+def BVZExtLoadAndZerosInt : SplatAndAssignIndexed<
build_vector,
2, (i64 0),
0, (i64 (zextloadi32 ForceXForm:$src))>;
-def BVLoadAndZeroFP : SplatAndAssignIndexed<
+def BVLoadAndZerosFP : SplatAndAssignIndexed<
build_vector,
4, (f32 fpimm0),
1, (f32 (load ForceXForm:$src))>;
-def BVLoadAndZeroInt : SplatAndAssignIndexed<
+def BVLoadAndZerosInt : SplatAndAssignIndexed<
build_vector,
4, (i32 0),
1, (i32 (load ForceXForm:$src))>;
-def BVLoadAndZeroDbl : SplatAndAssignIndexed<
+def BVLoadAndZerosDbl : SplatAndAssignIndexed<
build_vector,
2, (f64 fpimm0),
0, (f64 (load ForceXForm:$src))>;
-def BVLoadAndZeroLong : SplatAndAssignIndexed<
+def BVLoadAndZerosLong : SplatAndAssignIndexed<
build_vector,
2, (i64 0),
0, (i64 (load ForceXForm:$src))>;
@@ -3478,20 +3478,20 @@ def : Pat<(store (i32 (extractelt v4i32:$A, 1)), ForceXForm:$src),
def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
(STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
-// BUILD_VECTOR via loads and zeros.
+// BUILD_VECTOR via single load and zeros.
// Extension load.
-def : Pat<(v2f64 BVExtLoadAndZeroFP.DAG),
+def : Pat<(v2f64 BVExtLoadAndZerosFP.DAG),
(v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
-def : Pat<(v2i64 BVZExtLoadAndZeroInt.DAG),
+def : Pat<(v2i64 BVZExtLoadAndZerosInt.DAG),
(v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
// Normal load.
-def : Pat<(v4f32 BVLoadAndZeroFP.DAG),
+def : Pat<(v4f32 BVLoadAndZerosFP.DAG),
(v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-def : Pat<(v4i32 BVLoadAndZeroInt.DAG),
+def : Pat<(v4i32 BVLoadAndZerosInt.DAG),
(v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-def : Pat<(v2f64 BVLoadAndZeroDbl.DAG),
+def : Pat<(v2f64 BVLoadAndZerosDbl.DAG),
(v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
-def : Pat<(v2i64 BVLoadAndZeroLong.DAG),
+def : Pat<(v2i64 BVLoadAndZerosLong.DAG),
(v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
// Elements in a register on a BE system are in order <0, 1, 2, 3>.
More information about the llvm-commits
mailing list