[llvm] [PowerPC] xxspltib hoist out of loop (PR #127121)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 21 08:36:46 PST 2025


https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/127121

>From aedecc564adff9e09eb11f0035178882413afc6f Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 21 Feb 2025 16:52:50 +0000
Subject: [PATCH] [PowerPC] hoist xxspltib out of the loop

---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td    |  2 -
 llvm/test/CodeGen/PowerPC/licm-xxsplti.ll | 62 +++++++++++------------
 llvm/test/CodeGen/PowerPC/memset-tail.ll  | 32 ++++++------
 3 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 8e400bc63b785..d9e88b283a749 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1659,8 +1659,6 @@ let Predicates = [HasVSX, HasP9Vector] in {
                         RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
 
   // Vector Splat Immediate Byte
-  // FIXME: Setting the hasSideEffects flag here to match current behaviour.
-  let hasSideEffects = 1 in
   def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
                             "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
 
diff --git a/llvm/test/CodeGen/PowerPC/licm-xxsplti.ll b/llvm/test/CodeGen/PowerPC/licm-xxsplti.ll
index 1cf37e4192e1b..c6e6737211c2e 100644
--- a/llvm/test/CodeGen/PowerPC/licm-xxsplti.ll
+++ b/llvm/test/CodeGen/PowerPC/licm-xxsplti.ll
@@ -82,6 +82,7 @@ for.body:
 ; AIX32-NEXT:   li 6, 0
 ; AIX32-NEXT:   beq     0, L..BB0_4
 ; AIX32-NEXT: # %bb.2:                                # %for.body.preheader.new
+; AIX32-NEXT:   xxspltib 0, 6
 ; AIX32-NEXT:   addi 12, 4, -8
 ; AIX32-NEXT:   addi 9, 3, -8
 ; AIX32-NEXT:   rlwinm 7, 5, 0, 1, 30
@@ -91,20 +92,19 @@ for.body:
 ; AIX32-NEXT:   .align  4
 ; AIX32-NEXT: L..BB0_3:                               # %for.body
 ; AIX32-NEXT:                                         # =>This Inner Loop Header: Depth=1
-; AIX32-NEXT:   lxvwsx 0, 12, 10
-; AIX32-NEXT:   xxspltib 1, 6
-; AIX32-NEXT:   lxvwsx 2, 12, 11
+; AIX32-NEXT:   lxvwsx 1, 12, 10
 ; AIX32-NEXT:   addic 6, 6, 2
-; AIX32-NEXT:   addi 12, 12, 8
 ; AIX32-NEXT:   addze 8, 8
 ; AIX32-NEXT:   xor 0, 6, 7
 ; AIX32-NEXT:   or. 0, 0, 8
-; AIX32-NEXT:   xxland 0, 0, 1
-; AIX32-NEXT:   xxland 1, 2, 1
-; AIX32-NEXT:   xscvspdpn 0, 0
-; AIX32-NEXT:   stfsu 0, 8(9)
-; AIX32-NEXT:   xscvspdpn 0, 1
-; AIX32-NEXT:   stfs 0, 4(9)
+; AIX32-NEXT:   xxland 1, 1, 0
+; AIX32-NEXT:   xscvspdpn 1, 1
+; AIX32-NEXT:   stfsu 1, 8(9)
+; AIX32-NEXT:   lxvwsx 1, 12, 11
+; AIX32-NEXT:   addi 12, 12, 8
+; AIX32-NEXT:   xxland 1, 1, 0
+; AIX32-NEXT:   xscvspdpn 1, 1
+; AIX32-NEXT:   stfs 1, 4(9)
 ; AIX32-NEXT:   bne     0, L..BB0_3
 
 ; AIX64:      ._Z3fooPfS_Pi:
@@ -118,10 +118,11 @@ for.body:
 ; AIX64-NEXT:   beq     0, L..BB0_4
 ; AIX64-NEXT: # %bb.2:                                # %for.body.preheader.new
 ; AIX64-NEXT:   rlwinm 6, 5, 0, 1, 30
-; AIX64-NEXT:   addi 10, 4, -8
+; AIX64-NEXT:   xxspltib 0, 6
+; AIX64-NEXT:   addi 9, 4, -8
 ; AIX64-NEXT:   addi 7, 3, -8
 ; AIX64-NEXT:   li 8, 8
-; AIX64-NEXT:   li 9, 12
+; AIX64-NEXT:   li 10, 12
 ; AIX64-NEXT:   li 11, 4
 ; AIX64-NEXT:   addi 6, 6, -2
 ; AIX64-NEXT:   rldicl 6, 6, 63, 1
@@ -131,17 +132,16 @@ for.body:
 ; AIX64-NEXT:   .align  4
 ; AIX64-NEXT: L..BB0_3:                               # %for.body
 ; AIX64-NEXT:                                         # =>This Inner Loop Header: Depth=1
-; AIX64-NEXT:   lxvwsx 0, 10, 8
-; AIX64-NEXT:   xxspltib 1, 6
+; AIX64-NEXT:   lxvwsx 1, 9, 8
 ; AIX64-NEXT:   addi 6, 6, 2
-; AIX64-NEXT:   xxland 0, 0, 1
-; AIX64-NEXT:   xscvspdpn 0, 0
-; AIX64-NEXT:   stfsu 0, 8(7)
-; AIX64-NEXT:   lxvwsx 0, 10, 9
-; AIX64-NEXT:   addi 10, 10, 8
-; AIX64-NEXT:   xxland 0, 0, 1
-; AIX64-NEXT:   xxsldwi 0, 0, 0, 3
-; AIX64-NEXT:   stfiwx 0, 7, 11
+; AIX64-NEXT:   xxland 1, 1, 0
+; AIX64-NEXT:   xscvspdpn 1, 1
+; AIX64-NEXT:   stfsu 1, 8(7)
+; AIX64-NEXT:   lxvwsx 1, 9, 10
+; AIX64-NEXT:   addi 9, 9, 8
+; AIX64-NEXT:   xxland 1, 1, 0
+; AIX64-NEXT:   xxsldwi 1, 1, 1, 3
+; AIX64-NEXT:   stfiwx 1, 7, 11
 ; AIX64-NEXT:   bdnz L..BB0_3
 
 ; LINUX64LE:      _Z3fooPfS_Pi:                           # @_Z3fooPfS_Pi
@@ -157,6 +157,7 @@ for.body:
 ; LINUX64LE-NEXT:       beq     0, .LBB0_4
 ; LINUX64LE-NEXT: # %bb.2:                                # %for.body.preheader.new
 ; LINUX64LE-NEXT:       rlwinm 6, 5, 0, 1, 30
+; LINUX64LE-NEXT:       xxspltib 0, 6
 ; LINUX64LE-NEXT:       addi 8, 4, -8
 ; LINUX64LE-NEXT:       addi 7, 3, -8
 ; LINUX64LE-NEXT:       li 9, 8
@@ -170,15 +171,14 @@ for.body:
 ; LINUX64LE-NEXT:       .p2align        4
 ; LINUX64LE-NEXT: .LBB0_3:                                # %for.body
 ; LINUX64LE-NEXT:                                         # =>This Inner Loop Header: Depth=1
-; LINUX64LE-NEXT:       lxvwsx 0, 8, 9
-; LINUX64LE-NEXT:       xxspltib 1, 6
+; LINUX64LE-NEXT:       lxvwsx 1, 8, 9
 ; LINUX64LE-NEXT:       addi 6, 6, 2
-; LINUX64LE-NEXT:       xxland 0, 0, 1
-; LINUX64LE-NEXT:       xxsldwi 0, 0, 0, 3
-; LINUX64LE-NEXT:       xscvspdpn 0, 0
-; LINUX64LE-NEXT:       stfsu 0, 8(7)
-; LINUX64LE-NEXT:       lxvwsx 0, 8, 10
+; LINUX64LE-NEXT:       xxland 1, 1, 0
+; LINUX64LE-NEXT:       xxsldwi 1, 1, 1, 3
+; LINUX64LE-NEXT:       xscvspdpn 1, 1
+; LINUX64LE-NEXT:       stfsu 1, 8(7)
+; LINUX64LE-NEXT:       lxvwsx 1, 8, 10
 ; LINUX64LE-NEXT:       addi 8, 8, 8
-; LINUX64LE-NEXT:       xxland 0, 0, 1
-; LINUX64LE-NEXT:       stxvrwx 0, 7, 11
+; LINUX64LE-NEXT:       xxland 1, 1, 0
+; LINUX64LE-NEXT:       stxvrwx 1, 7, 11
 ; LINUX64LE-NEXT:       bdnz .LBB0_3
diff --git a/llvm/test/CodeGen/PowerPC/memset-tail.ll b/llvm/test/CodeGen/PowerPC/memset-tail.ll
index 31c136d009ba5..eeca550fc93c2 100644
--- a/llvm/test/CodeGen/PowerPC/memset-tail.ll
+++ b/llvm/test/CodeGen/PowerPC/memset-tail.ll
@@ -380,17 +380,17 @@ define dso_local void @memsetTailV1B2(ptr nocapture noundef writeonly %p) local_
 ;
 ; P9-BE-LABEL: memsetTailV1B2:
 ; P9-BE:       # %bb.0: # %entry
+; P9-BE-NEXT:    xxspltib 0, 15
 ; P9-BE-NEXT:    li 4, 3855
 ; P9-BE-NEXT:    sth 4, 16(3)
-; P9-BE-NEXT:    xxspltib 0, 15
 ; P9-BE-NEXT:    stxv 0, 0(3)
 ; P9-BE-NEXT:    blr
 ;
 ; P10-BE-LABEL: memsetTailV1B2:
 ; P10-BE:       # %bb.0: # %entry
+; P10-BE-NEXT:    xxspltib 0, 15
 ; P10-BE-NEXT:    li 4, 3855
 ; P10-BE-NEXT:    sth 4, 16(3)
-; P10-BE-NEXT:    xxspltib 0, 15
 ; P10-BE-NEXT:    stxv 0, 0(3)
 ; P10-BE-NEXT:    blr
 ;
@@ -404,17 +404,17 @@ define dso_local void @memsetTailV1B2(ptr nocapture noundef writeonly %p) local_
 ;
 ; P9-LE-LABEL: memsetTailV1B2:
 ; P9-LE:       # %bb.0: # %entry
+; P9-LE-NEXT:    xxspltib 0, 15
 ; P9-LE-NEXT:    li 4, 3855
 ; P9-LE-NEXT:    sth 4, 16(3)
-; P9-LE-NEXT:    xxspltib 0, 15
 ; P9-LE-NEXT:    stxv 0, 0(3)
 ; P9-LE-NEXT:    blr
 ;
 ; P10-LE-LABEL: memsetTailV1B2:
 ; P10-LE:       # %bb.0: # %entry
+; P10-LE-NEXT:    xxspltib 0, 15
 ; P10-LE-NEXT:    li 4, 3855
 ; P10-LE-NEXT:    sth 4, 16(3)
-; P10-LE-NEXT:    xxspltib 0, 15
 ; P10-LE-NEXT:    stxv 0, 0(3)
 ; P10-LE-NEXT:    blr
 entry:
@@ -433,17 +433,17 @@ define dso_local void @memsetTailV1B1(ptr nocapture noundef writeonly %p) local_
 ;
 ; P9-BE-LABEL: memsetTailV1B1:
 ; P9-BE:       # %bb.0: # %entry
+; P9-BE-NEXT:    xxspltib 0, 15
 ; P9-BE-NEXT:    li 4, 15
 ; P9-BE-NEXT:    stb 4, 16(3)
-; P9-BE-NEXT:    xxspltib 0, 15
 ; P9-BE-NEXT:    stxv 0, 0(3)
 ; P9-BE-NEXT:    blr
 ;
 ; P10-BE-LABEL: memsetTailV1B1:
 ; P10-BE:       # %bb.0: # %entry
+; P10-BE-NEXT:    xxspltib 0, 15
 ; P10-BE-NEXT:    li 4, 15
 ; P10-BE-NEXT:    stb 4, 16(3)
-; P10-BE-NEXT:    xxspltib 0, 15
 ; P10-BE-NEXT:    stxv 0, 0(3)
 ; P10-BE-NEXT:    blr
 ;
@@ -457,17 +457,17 @@ define dso_local void @memsetTailV1B1(ptr nocapture noundef writeonly %p) local_
 ;
 ; P9-LE-LABEL: memsetTailV1B1:
 ; P9-LE:       # %bb.0: # %entry
+; P9-LE-NEXT:    xxspltib 0, 15
 ; P9-LE-NEXT:    li 4, 15
 ; P9-LE-NEXT:    stb 4, 16(3)
-; P9-LE-NEXT:    xxspltib 0, 15
 ; P9-LE-NEXT:    stxv 0, 0(3)
 ; P9-LE-NEXT:    blr
 ;
 ; P10-LE-LABEL: memsetTailV1B1:
 ; P10-LE:       # %bb.0: # %entry
+; P10-LE-NEXT:    xxspltib 0, 15
 ; P10-LE-NEXT:    li 4, 15
 ; P10-LE-NEXT:    stb 4, 16(3)
-; P10-LE-NEXT:    xxspltib 0, 15
 ; P10-LE-NEXT:    stxv 0, 0(3)
 ; P10-LE-NEXT:    blr
 entry:
@@ -861,17 +861,17 @@ define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local
 ;
 ; P9-BE-LABEL: memset2TailV1B2:
 ; P9-BE:       # %bb.0: # %entry
+; P9-BE-NEXT:    xxspltib 0, 165
 ; P9-BE-NEXT:    li 4, -23131
 ; P9-BE-NEXT:    sth 4, 16(3)
-; P9-BE-NEXT:    xxspltib 0, 165
 ; P9-BE-NEXT:    stxv 0, 0(3)
 ; P9-BE-NEXT:    blr
 ;
 ; P10-BE-LABEL: memset2TailV1B2:
 ; P10-BE:       # %bb.0: # %entry
+; P10-BE-NEXT:    xxspltib 0, 165
 ; P10-BE-NEXT:    li 4, -23131
 ; P10-BE-NEXT:    sth 4, 16(3)
-; P10-BE-NEXT:    xxspltib 0, 165
 ; P10-BE-NEXT:    stxv 0, 0(3)
 ; P10-BE-NEXT:    blr
 ;
@@ -887,17 +887,17 @@ define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local
 ;
 ; P9-LE-LABEL: memset2TailV1B2:
 ; P9-LE:       # %bb.0: # %entry
+; P9-LE-NEXT:    xxspltib 0, 165
 ; P9-LE-NEXT:    li 4, -23131
 ; P9-LE-NEXT:    sth 4, 16(3)
-; P9-LE-NEXT:    xxspltib 0, 165
 ; P9-LE-NEXT:    stxv 0, 0(3)
 ; P9-LE-NEXT:    blr
 ;
 ; P10-LE-LABEL: memset2TailV1B2:
 ; P10-LE:       # %bb.0: # %entry
+; P10-LE-NEXT:    xxspltib 0, 165
 ; P10-LE-NEXT:    li 4, -23131
 ; P10-LE-NEXT:    sth 4, 16(3)
-; P10-LE-NEXT:    xxspltib 0, 165
 ; P10-LE-NEXT:    stxv 0, 0(3)
 ; P10-LE-NEXT:    blr
 entry:
@@ -917,17 +917,17 @@ define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local
 ;
 ; P9-BE-LABEL: memset2TailV1B1:
 ; P9-BE:       # %bb.0: # %entry
+; P9-BE-NEXT:    xxspltib 0, 165
 ; P9-BE-NEXT:    li 4, -91
 ; P9-BE-NEXT:    stb 4, 16(3)
-; P9-BE-NEXT:    xxspltib 0, 165
 ; P9-BE-NEXT:    stxv 0, 0(3)
 ; P9-BE-NEXT:    blr
 ;
 ; P10-BE-LABEL: memset2TailV1B1:
 ; P10-BE:       # %bb.0: # %entry
+; P10-BE-NEXT:    xxspltib 0, 165
 ; P10-BE-NEXT:    li 4, -91
 ; P10-BE-NEXT:    stb 4, 16(3)
-; P10-BE-NEXT:    xxspltib 0, 165
 ; P10-BE-NEXT:    stxv 0, 0(3)
 ; P10-BE-NEXT:    blr
 ;
@@ -943,17 +943,17 @@ define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local
 ;
 ; P9-LE-LABEL: memset2TailV1B1:
 ; P9-LE:       # %bb.0: # %entry
+; P9-LE-NEXT:    xxspltib 0, 165
 ; P9-LE-NEXT:    li 4, -91
 ; P9-LE-NEXT:    stb 4, 16(3)
-; P9-LE-NEXT:    xxspltib 0, 165
 ; P9-LE-NEXT:    stxv 0, 0(3)
 ; P9-LE-NEXT:    blr
 ;
 ; P10-LE-LABEL: memset2TailV1B1:
 ; P10-LE:       # %bb.0: # %entry
+; P10-LE-NEXT:    xxspltib 0, 165
 ; P10-LE-NEXT:    li 4, -91
 ; P10-LE-NEXT:    stb 4, 16(3)
-; P10-LE-NEXT:    xxspltib 0, 165
 ; P10-LE-NEXT:    stxv 0, 0(3)
 ; P10-LE-NEXT:    blr
 entry:



More information about the llvm-commits mailing list