[llvm] [PowerPC] Optimize BUILD_VECTOR from load and zeros (PR #73609)

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 01:16:59 PST 2023


https://github.com/bzEq updated https://github.com/llvm/llvm-project/pull/73609

>From 587f8e153f13c4f44baa876a0f48f89056c29432 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:18:08 +0000
Subject: [PATCH 01/12] Up

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0e5f6b773bb5441..cc0009ff69e7d37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3403,6 +3403,12 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
 def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
           (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
 
+def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
+          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+
+
 defm : ScalToVecWPermute<
   v4i32, (i32 (load ForceXForm:$src)),
   (XXSLDWIs (LIWZX ForceXForm:$src), 1),

>From ea7eb55adfb1be9f76909b7e076c13930bc97e69 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:18:46 +0000
Subject: [PATCH 02/12] Format

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index cc0009ff69e7d37..e6a7f240b265597 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3405,6 +3405,7 @@ def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
 
 def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
           (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+
 def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
           (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
 

>From 8ac488546ba9cfcb7db34530f7f4158ddb08051c Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:29:22 +0000
Subject: [PATCH 03/12] Update test

---
 .../PowerPC/build-vector-from-load-and-zeros.ll        | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 16c2617b3564931..cf4c6c61840c72b 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -15,11 +15,7 @@ define  <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p, <2 x i64> noun
 ;
 ; PWR8-LABEL: build_v2i64:
 ; PWR8:       # %bb.0: # %entry
-; PWR8-NEXT:    lwz 3, 0(3)
-; PWR8-NEXT:    li 4, 0
-; PWR8-NEXT:    mtfprd 0, 4
-; PWR8-NEXT:    mtfprd 1, 3
-; PWR8-NEXT:    xxmrghd 34, 1, 0
+; PWR8-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -38,9 +34,7 @@ define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p, <2 x double>
 ;
 ; PWR8-LABEL: build_v2f64:
 ; PWR8:       # %bb.0: # %entry
-; PWR8-NEXT:    lfs 0, 0(3)
-; PWR8-NEXT:    xxlxor 1, 1, 1
-; PWR8-NEXT:    xxmrghd 34, 0, 1
+; PWR8-NEXT:    lxsspx 34, 0, 3
 ; PWR8-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4

>From 894e99867c719d75a4eb4e7fd58b37a3d13446a3 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 05:12:54 +0000
Subject: [PATCH 04/12] Minor

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e6a7f240b265597..c1079966ff3a2e6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3403,13 +3403,6 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
 def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
           (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
 
-def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
-          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
-
-def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
-          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-
-
 defm : ScalToVecWPermute<
   v4i32, (i32 (load ForceXForm:$src)),
   (XXSLDWIs (LIWZX ForceXForm:$src), 1),
@@ -3444,6 +3437,12 @@ def : Pat<(store (i32 (extractelt v4i32:$A, 1)), ForceXForm:$src),
 def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
           (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
 
+// BUILD_VECTOR via loads and zeros.
+def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
+          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+
 // Elements in a register on a BE system are in order <0, 1, 2, 3>.
 // The store instructions store the second word from the left.
 // So to align element zero, we need to modulo-left-shift by 3 words.

>From 2361068029688d69981057d7673a0ed8c69e7eae Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 07:37:33 +0000
Subject: [PATCH 05/12] Use general DAG

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index c1079966ff3a2e6..76534ded71d603a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2431,6 +2431,24 @@ def DblwdCmp {
                                (v2i64 (XXSPLTW EQWSHAND, 2)), 0));
 }
 
+class SplatAndAssignIndexed<
+      SDPatternOperator op,
+      dag splat,
+      int Total,
+      int Index, dag assign> {
+  defvar head = !listsplat(splat, Index);
+  defvar x = [assign];
+  defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
+  defvar Ops = !listconcat(head, x, tail);
+  dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
+}
+
+def BVLoadAndZeroFP : SplatAndAssignIndexed<
+    build_vector,
+    (f64 fpimm0),
+    2, 0,
+    (f64 (extloadf32 ForceXForm:$src))>;
+
 //---------------------------- Anonymous Patterns ----------------------------//
 // Predicate combinations are kept in roughly chronological order in terms of
 // instruction availability in the architecture. For example, VSX came in with
@@ -3438,10 +3456,15 @@ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
           (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
 
 // BUILD_VECTOR via loads and zeros.
-def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+//def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+//          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZeroFP.DAG),
           (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
 def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
           (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v4f32 (build_vector (f32 fpimm0), (f32 (load ForceXForm:$src)),
+                               (f32 fpimm0), (f32 fpimm0))),
+          (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
 
 // Elements in a register on a BE system are in order <0, 1, 2, 3>.
 // The store instructions store the second word from the left.

>From f9176b94611ade46de119f3706c3393e1c0e1458 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 07:41:27 +0000
Subject: [PATCH 06/12] Minor

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 76534ded71d603a..440bfae99452c0b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2433,8 +2433,7 @@ def DblwdCmp {
 
 class SplatAndAssignIndexed<
       SDPatternOperator op,
-      dag splat,
-      int Total,
+      int Total, dag splat,
       int Index, dag assign> {
   defvar head = !listsplat(splat, Index);
   defvar x = [assign];
@@ -2445,9 +2444,8 @@ class SplatAndAssignIndexed<
 
 def BVLoadAndZeroFP : SplatAndAssignIndexed<
     build_vector,
-    (f64 fpimm0),
-    2, 0,
-    (f64 (extloadf32 ForceXForm:$src))>;
+    2, (f64 fpimm0),
+    0, (f64 (extloadf32 ForceXForm:$src))>;
 
 //---------------------------- Anonymous Patterns ----------------------------//
 // Predicate combinations are kept in roughly chronological order in terms of
@@ -3456,15 +3454,13 @@ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
           (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
 
 // BUILD_VECTOR via loads and zeros.
-//def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
-//          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
 def : Pat<(v2f64 BVLoadAndZeroFP.DAG),
           (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
 def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
           (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-def : Pat<(v4f32 (build_vector (f32 fpimm0), (f32 (load ForceXForm:$src)),
-                               (f32 fpimm0), (f32 fpimm0))),
-          (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+// def : Pat<(v4f32 (build_vector (f32 fpimm0), (f32 (load ForceXForm:$src)),
+//                                (f32 fpimm0), (f32 fpimm0))),
+//           (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
 
 // Elements in a register on a BE system are in order <0, 1, 2, 3>.
 // The store instructions store the second word from the left.

>From a6cee02cda9b18f894654cc37d6aec1785dd6240 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 08:20:07 +0000
Subject: [PATCH 07/12] Up

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td   | 30 ++++++++++++++++++------
 llvm/test/CodeGen/PowerPC/vec-promote.ll |  4 +---
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 440bfae99452c0b..97919bdfee3099f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2438,15 +2438,30 @@ class SplatAndAssignIndexed<
   defvar head = !listsplat(splat, Index);
   defvar x = [assign];
   defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
-  defvar Ops = !listconcat(head, x, tail);
+  list<dag> Ops = !listconcat(head, x, tail);
   dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
 }
 
-def BVLoadAndZeroFP : SplatAndAssignIndexed<
+def BVExtLoadAndZeroFP : SplatAndAssignIndexed<
     build_vector,
     2, (f64 fpimm0),
     0, (f64 (extloadf32 ForceXForm:$src))>;
 
+def BVZExtLoadAndZeroInt : SplatAndAssignIndexed<
+    build_vector,
+    2, (i64 0),
+    0, (i64 (zextloadi32 ForceXForm:$src))>;
+
+def BVLoadAndZeroFP : SplatAndAssignIndexed<
+    build_vector,
+    4, (f32 fpimm0),
+    1, (f32 (load ForceXForm:$src))>;
+
+def BVLoadAndZeroDbl : SplatAndAssignIndexed<
+    build_vector,
+    2, (f64 fpimm0),
+    0, (f64 (load ForceXForm:$src))>;
+
 //---------------------------- Anonymous Patterns ----------------------------//
 // Predicate combinations are kept in roughly chronological order in terms of
 // instruction availability in the architecture. For example, VSX came in with
@@ -3454,13 +3469,14 @@ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
           (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
 
 // BUILD_VECTOR via loads and zeros.
-def : Pat<(v2f64 BVLoadAndZeroFP.DAG),
+def : Pat<(v2f64 BVExtLoadAndZeroFP.DAG),
           (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
-def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
+def : Pat<(v2i64 BVZExtLoadAndZeroInt.DAG),
           (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-// def : Pat<(v4f32 (build_vector (f32 fpimm0), (f32 (load ForceXForm:$src)),
-//                                (f32 fpimm0), (f32 fpimm0))),
-//           (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v4f32 BVLoadAndZeroFP.DAG),
+          (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v2f64 BVLoadAndZeroDbl.DAG),
+          (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
 
 // Elements in a register on a BE system are in order <0, 1, 2, 3>.
 // The store instructions store the second word from the left.
diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
index 628c5101c079652..31a9480c4e7508e 100644
--- a/llvm/test/CodeGen/PowerPC/vec-promote.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -7,9 +7,7 @@
 define noundef <2 x double> @vec_promote_double_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_double_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lfd 0, 0(3)
-; CHECK-BE-NEXT:    xxlxor 1, 1, 1
-; CHECK-BE-NEXT:    xxmrghd 34, 0, 1
+; CHECK-BE-NEXT:    lxsdx 34, 0, 3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_double_zeroed:

>From 40757f357c53df2027620d9ceb8e2564d88b0c70 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 08:36:13 +0000
Subject: [PATCH 08/12] More test change

---
 .../build-vector-from-load-and-zeros.ll       | 31 +++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index cf4c6c61840c72b..ac75b81f62d98d0 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck --check-prefix=PWR8 %s
 
-define  <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p, <2 x i64> noundef %a) {
+define  <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p) {
 ; CHECK-LABEL: build_v2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lwz 3, 0(3)
@@ -24,7 +24,7 @@ entry:
   ret <2 x i64> %vecinit1
 }
 
-define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p, <2 x double> noundef %a) {
+define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p) {
 ; CHECK-LABEL: build_v2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lfs 0, 0(3)
@@ -42,3 +42,30 @@ entry:
   %vecinit1 = insertelement <2 x double> <double poison, double 0.000000e+00>, double %conv, i64 0
   ret <2 x double> %vecinit1
 }
+
+define <4 x float> @build_v4f32(ptr nocapture noundef readonly %p) {
+; CHECK-LABEL: build_v4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lwz 3, 0(3)
+; CHECK-NEXT:    li 4, 0
+; CHECK-NEXT:    stw 4, -32(1)
+; CHECK-NEXT:    stw 3, -16(1)
+; CHECK-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI2_0 at toc@l
+; CHECK-NEXT:    lxvw4x 34, 0, 3
+; CHECK-NEXT:    addi 3, 1, -32
+; CHECK-NEXT:    lxvw4x 35, 0, 3
+; CHECK-NEXT:    addi 3, 1, -16
+; CHECK-NEXT:    lxvw4x 36, 0, 3
+; CHECK-NEXT:    vperm 2, 3, 4, 2
+; CHECK-NEXT:    blr
+;
+; PWR8-LABEL: build_v4f32:
+; PWR8:       # %bb.0: # %entry
+; PWR8-NEXT:    lxsiwzx 34, 0, 3
+; PWR8-NEXT:    blr
+entry:
+  %0 = load float, ptr %p, align 4
+  %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00,float 0.000000e+00,float 0.000000e+00>, float %0, i64 1
+  ret <4 x float> %vecinit1
+}

>From 5ae66707fdc0a59e681d920eb490ad6a45b31261 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 08:37:11 +0000
Subject: [PATCH 09/12] Minor

---
 llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index ac75b81f62d98d0..b572f94377db1ff 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -66,6 +66,6 @@ define <4 x float> @build_v4f32(ptr nocapture noundef readonly %p) {
 ; PWR8-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
-  %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00,float 0.000000e+00,float 0.000000e+00>, float %0, i64 1
+  %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00>, float %0, i64 1
   ret <4 x float> %vecinit1
 }

>From f6cdbbdafc095f4753b92d94eb892b6f9e6a684c Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 09:06:42 +0000
Subject: [PATCH 10/12] Basic cases

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 97919bdfee3099f..316038a77b08e44 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2457,11 +2457,21 @@ def BVLoadAndZeroFP : SplatAndAssignIndexed<
     4, (f32 fpimm0),
     1, (f32 (load ForceXForm:$src))>;
 
+def BVLoadAndZeroInt : SplatAndAssignIndexed<
+    build_vector,
+    4, (i32 0),
+    1, (i32 (load ForceXForm:$src))>;
+
 def BVLoadAndZeroDbl : SplatAndAssignIndexed<
     build_vector,
     2, (f64 fpimm0),
     0, (f64 (load ForceXForm:$src))>;
 
+def BVLoadAndZeroLong : SplatAndAssignIndexed<
+    build_vector,
+    2, (i64 0),
+    0, (i64 (load ForceXForm:$src))>;
+
 //---------------------------- Anonymous Patterns ----------------------------//
 // Predicate combinations are kept in roughly chronological order in terms of
 // instruction availability in the architecture. For example, VSX came in with
@@ -3469,14 +3479,20 @@ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
           (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
 
 // BUILD_VECTOR via loads and zeros.
+// Extension load.
 def : Pat<(v2f64 BVExtLoadAndZeroFP.DAG),
           (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
 def : Pat<(v2i64 BVZExtLoadAndZeroInt.DAG),
           (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+// Normal load.
 def : Pat<(v4f32 BVLoadAndZeroFP.DAG),
           (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+def : Pat<(v4i32 BVLoadAndZeroInt.DAG),
+          (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
 def : Pat<(v2f64 BVLoadAndZeroDbl.DAG),
           (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 BVLoadAndZeroLong.DAG),
+          (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
 
 // Elements in a register on a BE system are in order <0, 1, 2, 3>.
 // The store instructions store the second word from the left.

>From 9628746c89c46207ce7f1070af513b56e9ab81a0 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 09:07:28 +0000
Subject: [PATCH 11/12] Up

---
 llvm/test/CodeGen/PowerPC/vec-promote.ll | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/vec-promote.ll b/llvm/test/CodeGen/PowerPC/vec-promote.ll
index 31a9480c4e7508e..d8c541c48db5255 100644
--- a/llvm/test/CodeGen/PowerPC/vec-promote.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-promote.ll
@@ -87,11 +87,7 @@ entry:
 define noundef <2 x i64> @vec_promote_long_long_zeroed(ptr nocapture noundef readonly %p) {
 ; CHECK-BE-LABEL: vec_promote_long_long_zeroed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    ld 3, 0(3)
-; CHECK-BE-NEXT:    li 4, 0
-; CHECK-BE-NEXT:    mtfprd 0, 4
-; CHECK-BE-NEXT:    mtfprd 1, 3
-; CHECK-BE-NEXT:    xxmrghd 34, 1, 0
+; CHECK-BE-NEXT:    lxsdx 34, 0, 3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: vec_promote_long_long_zeroed:

>From e512da698869072fb34755a974a7a256f4b1e94a Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Wed, 29 Nov 2023 09:12:21 +0000
Subject: [PATCH 12/12] Minor

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 316038a77b08e44..945b8e601b4aee7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2442,32 +2442,32 @@ class SplatAndAssignIndexed<
   dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
 }
 
-def BVExtLoadAndZeroFP : SplatAndAssignIndexed<
+def BVExtLoadAndZerosFP : SplatAndAssignIndexed<
     build_vector,
     2, (f64 fpimm0),
     0, (f64 (extloadf32 ForceXForm:$src))>;
 
-def BVZExtLoadAndZeroInt : SplatAndAssignIndexed<
+def BVZExtLoadAndZerosInt : SplatAndAssignIndexed<
     build_vector,
     2, (i64 0),
     0, (i64 (zextloadi32 ForceXForm:$src))>;
 
-def BVLoadAndZeroFP : SplatAndAssignIndexed<
+def BVLoadAndZerosFP : SplatAndAssignIndexed<
     build_vector,
     4, (f32 fpimm0),
     1, (f32 (load ForceXForm:$src))>;
 
-def BVLoadAndZeroInt : SplatAndAssignIndexed<
+def BVLoadAndZerosInt : SplatAndAssignIndexed<
     build_vector,
     4, (i32 0),
     1, (i32 (load ForceXForm:$src))>;
 
-def BVLoadAndZeroDbl : SplatAndAssignIndexed<
+def BVLoadAndZerosDbl : SplatAndAssignIndexed<
     build_vector,
     2, (f64 fpimm0),
     0, (f64 (load ForceXForm:$src))>;
 
-def BVLoadAndZeroLong : SplatAndAssignIndexed<
+def BVLoadAndZerosLong : SplatAndAssignIndexed<
     build_vector,
     2, (i64 0),
     0, (i64 (load ForceXForm:$src))>;
@@ -3478,20 +3478,20 @@ def : Pat<(store (i32 (extractelt v4i32:$A, 1)), ForceXForm:$src),
 def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
           (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
 
-// BUILD_VECTOR via loads and zeros.
+// BUILD_VECTOR via single load and zeros.
 // Extension load.
-def : Pat<(v2f64 BVExtLoadAndZeroFP.DAG),
+def : Pat<(v2f64 BVExtLoadAndZerosFP.DAG),
           (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
-def : Pat<(v2i64 BVZExtLoadAndZeroInt.DAG),
+def : Pat<(v2i64 BVZExtLoadAndZerosInt.DAG),
           (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
 // Normal load.
-def : Pat<(v4f32 BVLoadAndZeroFP.DAG),
+def : Pat<(v4f32 BVLoadAndZerosFP.DAG),
           (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-def : Pat<(v4i32 BVLoadAndZeroInt.DAG),
+def : Pat<(v4i32 BVLoadAndZerosInt.DAG),
           (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-def : Pat<(v2f64 BVLoadAndZeroDbl.DAG),
+def : Pat<(v2f64 BVLoadAndZerosDbl.DAG),
           (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
-def : Pat<(v2i64 BVLoadAndZeroLong.DAG),
+def : Pat<(v2i64 BVLoadAndZerosLong.DAG),
           (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
 
 // Elements in a register on a BE system are in order <0, 1, 2, 3>.



More information about the llvm-commits mailing list