[llvm] [NFC][PowerPC] Pre-commit to optimize bswap64 builtin for power8 (PR #181776)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 17 02:06:10 PDT 2026
https://github.com/Himadhith updated https://github.com/llvm/llvm-project/pull/181776
>From dec42e840748228af503a005be3c11b4d03f6b98 Mon Sep 17 00:00:00 2001
From: himadhith <himadhith.v at ibm.com>
Date: Tue, 17 Feb 2026 00:48:32 -0500
Subject: [PATCH 1/2] [NFC][PowerPC] Pre-commit to optimize bswap64 builtin for
power8
---
.../CodeGen/PowerPC/p8-optimize-bswap64.ll | 24 +++++++++++++++++++
1 file changed, 24 insertions(+)
create mode 100644 llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
diff --git a/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll b/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
new file mode 100644
index 0000000000000..135cf3b3a5e93
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-ibm-aix \
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
+define noundef i64 @swap8_openxl(i64 noundef %value) local_unnamed_addr #0 {
+; CHECK-LABEL: swap8_openxl:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rotldi r5, r3, 16
+; CHECK-NEXT: rotldi r4, r3, 8
+; CHECK-NEXT: rldimi r4, r5, 8, 48
+; CHECK-NEXT: rotldi r5, r3, 24
+; CHECK-NEXT: rldimi r4, r5, 16, 40
+; CHECK-NEXT: rotldi r5, r3, 32
+; CHECK-NEXT: rldimi r4, r5, 24, 32
+; CHECK-NEXT: rotldi r5, r3, 48
+; CHECK-NEXT: rldimi r4, r5, 40, 16
+; CHECK-NEXT: rotldi r5, r3, 56
+; CHECK-NEXT: rldimi r4, r5, 48, 8
+; CHECK-NEXT: rldimi r4, r3, 56, 0
+; CHECK-NEXT: mr r3, r4
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.bswap.i64(i64 %value)
+ ret i64 %0
+}
>From f52c6fbd2da7dcb8da96fa74cd3ba420842b73ec Mon Sep 17 00:00:00 2001
From: himadhith <himadhith.v at ibm.com>
Date: Tue, 17 Feb 2026 01:52:01 -0500
Subject: [PATCH 2/2] PR review changes
---
llvm/test/CodeGen/PowerPC/bswap64.ll | 41 +++++++++++++++++++
.../CodeGen/PowerPC/p8-optimize-bswap64.ll | 24 -----------
2 files changed, 41 insertions(+), 24 deletions(-)
delete mode 100644 llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
diff --git a/llvm/test/CodeGen/PowerPC/bswap64.ll b/llvm/test/CodeGen/PowerPC/bswap64.ll
index ef3cd4aa72cab..710f8e464c303 100644
--- a/llvm/test/CodeGen/PowerPC/bswap64.ll
+++ b/llvm/test/CodeGen/PowerPC/bswap64.ll
@@ -1,5 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
+; RUN: -mcpu=pwr8 | FileCheck %s --check-prefix=POWER-8
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
+; RUN: -mcpu=pwr8 -mattr=-altivec | FileCheck %s --check-prefix=POWER-8-NO-ALTIVEC
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
; RUN: -mcpu=pwr9 | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff \
; RUN: -mcpu=pwr9 -vec-extabi | FileCheck %s
@@ -10,7 +14,44 @@
declare i64 @llvm.bswap.i64(i64)
+; For now both the set of instructions for P8 are unoptimized versions.
+; A future patch will leverage parallelism and improve the
+; efficiency and performance.
define i64 @bswap64(i64 %x) {
+; POWER-8-LABEL: bswap64:
+; POWER-8: # %bb.0: # %entry
+; POWER-8-NEXT: rotldi 5, 3, 16
+; POWER-8-NEXT: rotldi 4, 3, 8
+; POWER-8-NEXT: rldimi 4, 5, 8, 48
+; POWER-8-NEXT: rotldi 5, 3, 24
+; POWER-8-NEXT: rldimi 4, 5, 16, 40
+; POWER-8-NEXT: rotldi 5, 3, 32
+; POWER-8-NEXT: rldimi 4, 5, 24, 32
+; POWER-8-NEXT: rotldi 5, 3, 48
+; POWER-8-NEXT: rldimi 4, 5, 40, 16
+; POWER-8-NEXT: rotldi 5, 3, 56
+; POWER-8-NEXT: rldimi 4, 5, 48, 8
+; POWER-8-NEXT: rldimi 4, 3, 56, 0
+; POWER-8-NEXT: mr 3, 4
+; POWER-8-NEXT: blr
+;
+; POWER-8-NO-ALTIVEC-LABEL: bswap64:
+; POWER-8-NO-ALTIVEC: # %bb.0: # %entry
+; POWER-8-NO-ALTIVEC-NEXT: rotldi 5, 3, 16
+; POWER-8-NO-ALTIVEC-NEXT: rotldi 4, 3, 8
+; POWER-8-NO-ALTIVEC-NEXT: rldimi 4, 5, 8, 48
+; POWER-8-NO-ALTIVEC-NEXT: rotldi 5, 3, 24
+; POWER-8-NO-ALTIVEC-NEXT: rldimi 4, 5, 16, 40
+; POWER-8-NO-ALTIVEC-NEXT: rotldi 5, 3, 32
+; POWER-8-NO-ALTIVEC-NEXT: rldimi 4, 5, 24, 32
+; POWER-8-NO-ALTIVEC-NEXT: rotldi 5, 3, 48
+; POWER-8-NO-ALTIVEC-NEXT: rldimi 4, 5, 40, 16
+; POWER-8-NO-ALTIVEC-NEXT: rotldi 5, 3, 56
+; POWER-8-NO-ALTIVEC-NEXT: rldimi 4, 5, 48, 8
+; POWER-8-NO-ALTIVEC-NEXT: rldimi 4, 3, 56, 0
+; POWER-8-NO-ALTIVEC-NEXT: mr 3, 4
+; POWER-8-NO-ALTIVEC-NEXT: blr
+;
; CHECK-LABEL: bswap64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mtvsrdd 34, 3, 3
diff --git a/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll b/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
deleted file mode 100644
index 135cf3b3a5e93..0000000000000
--- a/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-ibm-aix \
-; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
-define noundef i64 @swap8_openxl(i64 noundef %value) local_unnamed_addr #0 {
-; CHECK-LABEL: swap8_openxl:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: rotldi r5, r3, 16
-; CHECK-NEXT: rotldi r4, r3, 8
-; CHECK-NEXT: rldimi r4, r5, 8, 48
-; CHECK-NEXT: rotldi r5, r3, 24
-; CHECK-NEXT: rldimi r4, r5, 16, 40
-; CHECK-NEXT: rotldi r5, r3, 32
-; CHECK-NEXT: rldimi r4, r5, 24, 32
-; CHECK-NEXT: rotldi r5, r3, 48
-; CHECK-NEXT: rldimi r4, r5, 40, 16
-; CHECK-NEXT: rotldi r5, r3, 56
-; CHECK-NEXT: rldimi r4, r5, 48, 8
-; CHECK-NEXT: rldimi r4, r3, 56, 0
-; CHECK-NEXT: mr r3, r4
-; CHECK-NEXT: blr
-entry:
- %0 = tail call i64 @llvm.bswap.i64(i64 %value)
- ret i64 %0
-}
More information about the llvm-commits
mailing list