[llvm] [PowerPC] Optimize BUILD_VECTOR from load and zeros (PR #73609)

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 27 21:13:05 PST 2023


https://github.com/bzEq updated https://github.com/llvm/llvm-project/pull/73609

>From 587f8e153f13c4f44baa876a0f48f89056c29432 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:18:08 +0000
Subject: [PATCH 1/4] Up

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0e5f6b773bb5441..cc0009ff69e7d37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3403,6 +3403,12 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
 def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
           (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
 
+def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
+          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+
+
 defm : ScalToVecWPermute<
   v4i32, (i32 (load ForceXForm:$src)),
   (XXSLDWIs (LIWZX ForceXForm:$src), 1),

>From ea7eb55adfb1be9f76909b7e076c13930bc97e69 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:18:46 +0000
Subject: [PATCH 2/4] Format

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index cc0009ff69e7d37..e6a7f240b265597 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3405,6 +3405,7 @@ def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
 
 def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
           (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+
 def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
           (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
 

>From 8ac488546ba9cfcb7db34530f7f4158ddb08051c Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 03:29:22 +0000
Subject: [PATCH 3/4] Update test

---
 .../PowerPC/build-vector-from-load-and-zeros.ll        | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 16c2617b3564931..cf4c6c61840c72b 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -15,11 +15,7 @@ define  <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p, <2 x i64> noun
 ;
 ; PWR8-LABEL: build_v2i64:
 ; PWR8:       # %bb.0: # %entry
-; PWR8-NEXT:    lwz 3, 0(3)
-; PWR8-NEXT:    li 4, 0
-; PWR8-NEXT:    mtfprd 0, 4
-; PWR8-NEXT:    mtfprd 1, 3
-; PWR8-NEXT:    xxmrghd 34, 1, 0
+; PWR8-NEXT:    lxsiwzx 34, 0, 3
 ; PWR8-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
@@ -38,9 +34,7 @@ define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p, <2 x double>
 ;
 ; PWR8-LABEL: build_v2f64:
 ; PWR8:       # %bb.0: # %entry
-; PWR8-NEXT:    lfs 0, 0(3)
-; PWR8-NEXT:    xxlxor 1, 1, 1
-; PWR8-NEXT:    xxmrghd 34, 0, 1
+; PWR8-NEXT:    lxsspx 34, 0, 3
 ; PWR8-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4

>From 894e99867c719d75a4eb4e7fd58b37a3d13446a3 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 28 Nov 2023 05:12:54 +0000
Subject: [PATCH 4/4] Minor

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e6a7f240b265597..c1079966ff3a2e6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3403,13 +3403,6 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
 def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
           (f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
 
-def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
-          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
-
-def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
-          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
-
-
 defm : ScalToVecWPermute<
   v4i32, (i32 (load ForceXForm:$src)),
   (XXSLDWIs (LIWZX ForceXForm:$src), 1),
@@ -3444,6 +3437,12 @@ def : Pat<(store (i32 (extractelt v4i32:$A, 1)), ForceXForm:$src),
 def : Pat<(store (f32 (extractelt v4f32:$A, 1)), ForceXForm:$src),
           (STIWX (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
 
+// BUILD_VECTOR via loads and zeros.
+def : Pat<(v2f64 (build_vector (f64 (extloadf32 ForceXForm:$src)), (f64 fpimm0))),
+          (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
+def : Pat<(v2i64 (build_vector (i64 (zextloadi32 ForceXForm:$src)), (i64 0))),
+          (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
+
 // Elements in a register on a BE system are in order <0, 1, 2, 3>.
 // The store instructions store the second word from the left.
 // So to align element zero, we need to modulo-left-shift by 3 words.



More information about the llvm-commits mailing list