[llvm] [NFC] NFC patch to lockdown instructions of vspltisw for addition of vector of 1s (PR #160476)

Wed Sep 24 02:35:27 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-powerpc

Author: None (Himadhith)

<details>
<summary>Changes</summary>

This NFC patch looks to lock down the instruction generated for the operation of `A + vector {1, 1, 1, 1}` in which the current code emits `vspltisw`. 
It can be made better with the use of a `2 cycle` instruction `xxleqv` over the current `4 cycle vspltisw`.

---
Full diff: https://github.com/llvm/llvm-project/pull/160476.diff


1 Files Affected:

- (added) llvm/test/CodeGen/PowerPC/vector-all-ones.ll (+47) 


``````````diff

diff --git a/llvm/test/CodeGen/PowerPC/vector-all-ones.ll b/llvm/test/CodeGen/PowerPC/vector-all-ones.ll
new file mode 100644
index 0000000000000..7ad41482ffe81
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vector-all-ones.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64LE
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \
+; RUN:     -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
+; RUN:     -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_32
+
+; Currently the generated code uses `vspltisw` to generate vector of 1s followed by add operation.
+; This pattern is expected to be optimized in a future patch by using `xxleqv` to generate vector of -1s
+; followed by subtraction operation.
+define dso_local <4 x i32> @test1(<4 x i32>  %a) {
+; POWERPC_64LE-LABEL: test1:
+; POWERPC_64LE:       # %bb.0: # %entry
+; POWERPC_64LE-NEXT:    vspltisw v3, 1
+; POWERPC_64LE-NEXT:    stxv v2, -16(r1)
+; POWERPC_64LE-NEXT:    vadduwm v2, v2, v3
+; POWERPC_64LE-NEXT:    stxv v3, -32(r1)
+; POWERPC_64LE-NEXT:    blr
+;
+; POWERPC_64-LABEL: test1:
+; POWERPC_64:       # %bb.0: # %entry
+; POWERPC_64-NEXT:    vspltisw v3, 1
+; POWERPC_64-NEXT:    stxv v2, -16(r1)
+; POWERPC_64-NEXT:    vadduwm v2, v2, v3
+; POWERPC_64-NEXT:    stxv v3, -32(r1)
+; POWERPC_64-NEXT:    blr
+;
+; POWERPC_32-LABEL: test1:
+; POWERPC_32:       # %bb.0: # %entry
+; POWERPC_32-NEXT:    vspltisw v3, 1
+; POWERPC_32-NEXT:    stxv v2, -16(r1)
+; POWERPC_32-NEXT:    vadduwm v2, v2, v3
+; POWERPC_32-NEXT:    stxv v3, -32(r1)
+; POWERPC_32-NEXT:    blr
+entry:
+  %a.addr = alloca <4 x i32>, align 16
+  %b = alloca <4 x i32>, align 16
+  store <4 x i32> %a, ptr %a.addr, align 16
+  store <4 x i32> splat (i32 1), ptr %b, align 16
+  %0 = load <4 x i32>, ptr %a.addr, align 16
+  %1 = load <4 x i32>, ptr %b, align 16
+  %add = add <4 x i32> %0, %1
+  ret <4 x i32> %add
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/160476