[llvm] [PowerPC] xxspltib hoist out of loop (PR #127121)
zhijian lin via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 08:36:46 PST 2025
https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/127121
>From aedecc564adff9e09eb11f0035178882413afc6f Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 21 Feb 2025 16:52:50 +0000
Subject: [PATCH] [PowerPC] hoist xxspltib out of the loop
---
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 2 -
llvm/test/CodeGen/PowerPC/licm-xxsplti.ll | 62 +++++++++++------------
llvm/test/CodeGen/PowerPC/memset-tail.ll | 32 ++++++------
3 files changed, 47 insertions(+), 49 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 8e400bc63b785..d9e88b283a749 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1659,8 +1659,6 @@ let Predicates = [HasVSX, HasP9Vector] in {
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
// Vector Splat Immediate Byte
- // FIXME: Setting the hasSideEffects flag here to match current behaviour.
- let hasSideEffects = 1 in
def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
"xxspltib $XT, $IMM8", IIC_VecPerm, []>;
diff --git a/llvm/test/CodeGen/PowerPC/licm-xxsplti.ll b/llvm/test/CodeGen/PowerPC/licm-xxsplti.ll
index 1cf37e4192e1b..c6e6737211c2e 100644
--- a/llvm/test/CodeGen/PowerPC/licm-xxsplti.ll
+++ b/llvm/test/CodeGen/PowerPC/licm-xxsplti.ll
@@ -82,6 +82,7 @@ for.body:
; AIX32-NEXT: li 6, 0
; AIX32-NEXT: beq 0, L..BB0_4
; AIX32-NEXT: # %bb.2: # %for.body.preheader.new
+; AIX32-NEXT: xxspltib 0, 6
; AIX32-NEXT: addi 12, 4, -8
; AIX32-NEXT: addi 9, 3, -8
; AIX32-NEXT: rlwinm 7, 5, 0, 1, 30
@@ -91,20 +92,19 @@ for.body:
; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB0_3: # %for.body
; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
-; AIX32-NEXT: lxvwsx 0, 12, 10
-; AIX32-NEXT: xxspltib 1, 6
-; AIX32-NEXT: lxvwsx 2, 12, 11
+; AIX32-NEXT: lxvwsx 1, 12, 10
; AIX32-NEXT: addic 6, 6, 2
-; AIX32-NEXT: addi 12, 12, 8
; AIX32-NEXT: addze 8, 8
; AIX32-NEXT: xor 0, 6, 7
; AIX32-NEXT: or. 0, 0, 8
-; AIX32-NEXT: xxland 0, 0, 1
-; AIX32-NEXT: xxland 1, 2, 1
-; AIX32-NEXT: xscvspdpn 0, 0
-; AIX32-NEXT: stfsu 0, 8(9)
-; AIX32-NEXT: xscvspdpn 0, 1
-; AIX32-NEXT: stfs 0, 4(9)
+; AIX32-NEXT: xxland 1, 1, 0
+; AIX32-NEXT: xscvspdpn 1, 1
+; AIX32-NEXT: stfsu 1, 8(9)
+; AIX32-NEXT: lxvwsx 1, 12, 11
+; AIX32-NEXT: addi 12, 12, 8
+; AIX32-NEXT: xxland 1, 1, 0
+; AIX32-NEXT: xscvspdpn 1, 1
+; AIX32-NEXT: stfs 1, 4(9)
; AIX32-NEXT: bne 0, L..BB0_3
; AIX64: ._Z3fooPfS_Pi:
@@ -118,10 +118,11 @@ for.body:
; AIX64-NEXT: beq 0, L..BB0_4
; AIX64-NEXT: # %bb.2: # %for.body.preheader.new
; AIX64-NEXT: rlwinm 6, 5, 0, 1, 30
-; AIX64-NEXT: addi 10, 4, -8
+; AIX64-NEXT: xxspltib 0, 6
+; AIX64-NEXT: addi 9, 4, -8
; AIX64-NEXT: addi 7, 3, -8
; AIX64-NEXT: li 8, 8
-; AIX64-NEXT: li 9, 12
+; AIX64-NEXT: li 10, 12
; AIX64-NEXT: li 11, 4
; AIX64-NEXT: addi 6, 6, -2
; AIX64-NEXT: rldicl 6, 6, 63, 1
@@ -131,17 +132,16 @@ for.body:
; AIX64-NEXT: .align 4
; AIX64-NEXT: L..BB0_3: # %for.body
; AIX64-NEXT: # =>This Inner Loop Header: Depth=1
-; AIX64-NEXT: lxvwsx 0, 10, 8
-; AIX64-NEXT: xxspltib 1, 6
+; AIX64-NEXT: lxvwsx 1, 9, 8
; AIX64-NEXT: addi 6, 6, 2
-; AIX64-NEXT: xxland 0, 0, 1
-; AIX64-NEXT: xscvspdpn 0, 0
-; AIX64-NEXT: stfsu 0, 8(7)
-; AIX64-NEXT: lxvwsx 0, 10, 9
-; AIX64-NEXT: addi 10, 10, 8
-; AIX64-NEXT: xxland 0, 0, 1
-; AIX64-NEXT: xxsldwi 0, 0, 0, 3
-; AIX64-NEXT: stfiwx 0, 7, 11
+; AIX64-NEXT: xxland 1, 1, 0
+; AIX64-NEXT: xscvspdpn 1, 1
+; AIX64-NEXT: stfsu 1, 8(7)
+; AIX64-NEXT: lxvwsx 1, 9, 10
+; AIX64-NEXT: addi 9, 9, 8
+; AIX64-NEXT: xxland 1, 1, 0
+; AIX64-NEXT: xxsldwi 1, 1, 1, 3
+; AIX64-NEXT: stfiwx 1, 7, 11
; AIX64-NEXT: bdnz L..BB0_3
; LINUX64LE: _Z3fooPfS_Pi: # @_Z3fooPfS_Pi
@@ -157,6 +157,7 @@ for.body:
; LINUX64LE-NEXT: beq 0, .LBB0_4
; LINUX64LE-NEXT: # %bb.2: # %for.body.preheader.new
; LINUX64LE-NEXT: rlwinm 6, 5, 0, 1, 30
+; LINUX64LE-NEXT: xxspltib 0, 6
; LINUX64LE-NEXT: addi 8, 4, -8
; LINUX64LE-NEXT: addi 7, 3, -8
; LINUX64LE-NEXT: li 9, 8
@@ -170,15 +171,14 @@ for.body:
; LINUX64LE-NEXT: .p2align 4
; LINUX64LE-NEXT: .LBB0_3: # %for.body
; LINUX64LE-NEXT: # =>This Inner Loop Header: Depth=1
-; LINUX64LE-NEXT: lxvwsx 0, 8, 9
-; LINUX64LE-NEXT: xxspltib 1, 6
+; LINUX64LE-NEXT: lxvwsx 1, 8, 9
; LINUX64LE-NEXT: addi 6, 6, 2
-; LINUX64LE-NEXT: xxland 0, 0, 1
-; LINUX64LE-NEXT: xxsldwi 0, 0, 0, 3
-; LINUX64LE-NEXT: xscvspdpn 0, 0
-; LINUX64LE-NEXT: stfsu 0, 8(7)
-; LINUX64LE-NEXT: lxvwsx 0, 8, 10
+; LINUX64LE-NEXT: xxland 1, 1, 0
+; LINUX64LE-NEXT: xxsldwi 1, 1, 1, 3
+; LINUX64LE-NEXT: xscvspdpn 1, 1
+; LINUX64LE-NEXT: stfsu 1, 8(7)
+; LINUX64LE-NEXT: lxvwsx 1, 8, 10
; LINUX64LE-NEXT: addi 8, 8, 8
-; LINUX64LE-NEXT: xxland 0, 0, 1
-; LINUX64LE-NEXT: stxvrwx 0, 7, 11
+; LINUX64LE-NEXT: xxland 1, 1, 0
+; LINUX64LE-NEXT: stxvrwx 1, 7, 11
; LINUX64LE-NEXT: bdnz .LBB0_3
diff --git a/llvm/test/CodeGen/PowerPC/memset-tail.ll b/llvm/test/CodeGen/PowerPC/memset-tail.ll
index 31c136d009ba5..eeca550fc93c2 100644
--- a/llvm/test/CodeGen/PowerPC/memset-tail.ll
+++ b/llvm/test/CodeGen/PowerPC/memset-tail.ll
@@ -380,17 +380,17 @@ define dso_local void @memsetTailV1B2(ptr nocapture noundef writeonly %p) local_
;
; P9-BE-LABEL: memsetTailV1B2:
; P9-BE: # %bb.0: # %entry
+; P9-BE-NEXT: xxspltib 0, 15
; P9-BE-NEXT: li 4, 3855
; P9-BE-NEXT: sth 4, 16(3)
-; P9-BE-NEXT: xxspltib 0, 15
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memsetTailV1B2:
; P10-BE: # %bb.0: # %entry
+; P10-BE-NEXT: xxspltib 0, 15
; P10-BE-NEXT: li 4, 3855
; P10-BE-NEXT: sth 4, 16(3)
-; P10-BE-NEXT: xxspltib 0, 15
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
@@ -404,17 +404,17 @@ define dso_local void @memsetTailV1B2(ptr nocapture noundef writeonly %p) local_
;
; P9-LE-LABEL: memsetTailV1B2:
; P9-LE: # %bb.0: # %entry
+; P9-LE-NEXT: xxspltib 0, 15
; P9-LE-NEXT: li 4, 3855
; P9-LE-NEXT: sth 4, 16(3)
-; P9-LE-NEXT: xxspltib 0, 15
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memsetTailV1B2:
; P10-LE: # %bb.0: # %entry
+; P10-LE-NEXT: xxspltib 0, 15
; P10-LE-NEXT: li 4, 3855
; P10-LE-NEXT: sth 4, 16(3)
-; P10-LE-NEXT: xxspltib 0, 15
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
@@ -433,17 +433,17 @@ define dso_local void @memsetTailV1B1(ptr nocapture noundef writeonly %p) local_
;
; P9-BE-LABEL: memsetTailV1B1:
; P9-BE: # %bb.0: # %entry
+; P9-BE-NEXT: xxspltib 0, 15
; P9-BE-NEXT: li 4, 15
; P9-BE-NEXT: stb 4, 16(3)
-; P9-BE-NEXT: xxspltib 0, 15
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memsetTailV1B1:
; P10-BE: # %bb.0: # %entry
+; P10-BE-NEXT: xxspltib 0, 15
; P10-BE-NEXT: li 4, 15
; P10-BE-NEXT: stb 4, 16(3)
-; P10-BE-NEXT: xxspltib 0, 15
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
@@ -457,17 +457,17 @@ define dso_local void @memsetTailV1B1(ptr nocapture noundef writeonly %p) local_
;
; P9-LE-LABEL: memsetTailV1B1:
; P9-LE: # %bb.0: # %entry
+; P9-LE-NEXT: xxspltib 0, 15
; P9-LE-NEXT: li 4, 15
; P9-LE-NEXT: stb 4, 16(3)
-; P9-LE-NEXT: xxspltib 0, 15
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memsetTailV1B1:
; P10-LE: # %bb.0: # %entry
+; P10-LE-NEXT: xxspltib 0, 15
; P10-LE-NEXT: li 4, 15
; P10-LE-NEXT: stb 4, 16(3)
-; P10-LE-NEXT: xxspltib 0, 15
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
@@ -861,17 +861,17 @@ define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local
;
; P9-BE-LABEL: memset2TailV1B2:
; P9-BE: # %bb.0: # %entry
+; P9-BE-NEXT: xxspltib 0, 165
; P9-BE-NEXT: li 4, -23131
; P9-BE-NEXT: sth 4, 16(3)
-; P9-BE-NEXT: xxspltib 0, 165
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memset2TailV1B2:
; P10-BE: # %bb.0: # %entry
+; P10-BE-NEXT: xxspltib 0, 165
; P10-BE-NEXT: li 4, -23131
; P10-BE-NEXT: sth 4, 16(3)
-; P10-BE-NEXT: xxspltib 0, 165
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
@@ -887,17 +887,17 @@ define dso_local void @memset2TailV1B2(ptr nocapture noundef writeonly %p) local
;
; P9-LE-LABEL: memset2TailV1B2:
; P9-LE: # %bb.0: # %entry
+; P9-LE-NEXT: xxspltib 0, 165
; P9-LE-NEXT: li 4, -23131
; P9-LE-NEXT: sth 4, 16(3)
-; P9-LE-NEXT: xxspltib 0, 165
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memset2TailV1B2:
; P10-LE: # %bb.0: # %entry
+; P10-LE-NEXT: xxspltib 0, 165
; P10-LE-NEXT: li 4, -23131
; P10-LE-NEXT: sth 4, 16(3)
-; P10-LE-NEXT: xxspltib 0, 165
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
@@ -917,17 +917,17 @@ define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local
;
; P9-BE-LABEL: memset2TailV1B1:
; P9-BE: # %bb.0: # %entry
+; P9-BE-NEXT: xxspltib 0, 165
; P9-BE-NEXT: li 4, -91
; P9-BE-NEXT: stb 4, 16(3)
-; P9-BE-NEXT: xxspltib 0, 165
; P9-BE-NEXT: stxv 0, 0(3)
; P9-BE-NEXT: blr
;
; P10-BE-LABEL: memset2TailV1B1:
; P10-BE: # %bb.0: # %entry
+; P10-BE-NEXT: xxspltib 0, 165
; P10-BE-NEXT: li 4, -91
; P10-BE-NEXT: stb 4, 16(3)
-; P10-BE-NEXT: xxspltib 0, 165
; P10-BE-NEXT: stxv 0, 0(3)
; P10-BE-NEXT: blr
;
@@ -943,17 +943,17 @@ define dso_local void @memset2TailV1B1(ptr nocapture noundef writeonly %p) local
;
; P9-LE-LABEL: memset2TailV1B1:
; P9-LE: # %bb.0: # %entry
+; P9-LE-NEXT: xxspltib 0, 165
; P9-LE-NEXT: li 4, -91
; P9-LE-NEXT: stb 4, 16(3)
-; P9-LE-NEXT: xxspltib 0, 165
; P9-LE-NEXT: stxv 0, 0(3)
; P9-LE-NEXT: blr
;
; P10-LE-LABEL: memset2TailV1B1:
; P10-LE: # %bb.0: # %entry
+; P10-LE-NEXT: xxspltib 0, 165
; P10-LE-NEXT: li 4, -91
; P10-LE-NEXT: stb 4, 16(3)
-; P10-LE-NEXT: xxspltib 0, 165
; P10-LE-NEXT: stxv 0, 0(3)
; P10-LE-NEXT: blr
entry:
More information about the llvm-commits
mailing list