[llvm] [NFC][PowerPC] Pre-commit to optimize bswap64 builtin for power8 (PR #181776)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 17 02:06:10 PDT 2026


https://github.com/Himadhith updated https://github.com/llvm/llvm-project/pull/181776

>From dec42e840748228af503a005be3c11b4d03f6b98 Mon Sep 17 00:00:00 2001
From: himadhith <himadhith.v at ibm.com>
Date: Tue, 17 Feb 2026 00:48:32 -0500
Subject: [PATCH 1/2] [NFC][PowerPC] Pre-commit to optimize bswap64 builtin for
 power8

---
 .../CodeGen/PowerPC/p8-optimize-bswap64.ll    | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll

diff --git a/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll b/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
new file mode 100644
index 0000000000000..135cf3b3a5e93
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-ibm-aix \
+; RUN:     -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
+define noundef i64 @swap8_openxl(i64 noundef %value) local_unnamed_addr #0 {
+; CHECK-LABEL: swap8_openxl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rotldi r5, r3, 16
+; CHECK-NEXT:    rotldi r4, r3, 8
+; CHECK-NEXT:    rldimi r4, r5, 8, 48
+; CHECK-NEXT:    rotldi r5, r3, 24
+; CHECK-NEXT:    rldimi r4, r5, 16, 40
+; CHECK-NEXT:    rotldi r5, r3, 32
+; CHECK-NEXT:    rldimi r4, r5, 24, 32
+; CHECK-NEXT:    rotldi r5, r3, 48
+; CHECK-NEXT:    rldimi r4, r5, 40, 16
+; CHECK-NEXT:    rotldi r5, r3, 56
+; CHECK-NEXT:    rldimi r4, r5, 48, 8
+; CHECK-NEXT:    rldimi r4, r3, 56, 0
+; CHECK-NEXT:    mr r3, r4
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i64 @llvm.bswap.i64(i64 %value)
+  ret i64 %0
+}

>From f52c6fbd2da7dcb8da96fa74cd3ba420842b73ec Mon Sep 17 00:00:00 2001
From: himadhith <himadhith.v at ibm.com>
Date: Tue, 17 Feb 2026 01:52:01 -0500
Subject: [PATCH 2/2] PR review changes

---
 llvm/test/CodeGen/PowerPC/bswap64.ll          | 41 +++++++++++++++++++
 .../CodeGen/PowerPC/p8-optimize-bswap64.ll    | 24 -----------
 2 files changed, 41 insertions(+), 24 deletions(-)
 delete mode 100644 llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll

diff --git a/llvm/test/CodeGen/PowerPC/bswap64.ll b/llvm/test/CodeGen/PowerPC/bswap64.ll
index ef3cd4aa72cab..710f8e464c303 100644
--- a/llvm/test/CodeGen/PowerPC/bswap64.ll
+++ b/llvm/test/CodeGen/PowerPC/bswap64.ll
@@ -1,5 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -mcpu=pwr8 | FileCheck %s --check-prefix=POWER-8
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -mcpu=pwr8 -mattr=-altivec | FileCheck %s --check-prefix=POWER-8-NO-ALTIVEC
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -mcpu=pwr9 | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff \
 ; RUN:   -mcpu=pwr9 -vec-extabi | FileCheck %s
@@ -10,7 +14,44 @@
 
 declare i64 @llvm.bswap.i64(i64)
 
+; For now both the set of instructions for P8 are unoptimized versions.
+; A future patch will leverage parallelism and improve the
+; efficiency and performance.
 define i64 @bswap64(i64 %x) {
+; POWER-8-LABEL: bswap64:
+; POWER-8:       # %bb.0: # %entry
+; POWER-8-NEXT:    rotldi 5, 3, 16
+; POWER-8-NEXT:    rotldi 4, 3, 8
+; POWER-8-NEXT:    rldimi 4, 5, 8, 48
+; POWER-8-NEXT:    rotldi 5, 3, 24
+; POWER-8-NEXT:    rldimi 4, 5, 16, 40
+; POWER-8-NEXT:    rotldi 5, 3, 32
+; POWER-8-NEXT:    rldimi 4, 5, 24, 32
+; POWER-8-NEXT:    rotldi 5, 3, 48
+; POWER-8-NEXT:    rldimi 4, 5, 40, 16
+; POWER-8-NEXT:    rotldi 5, 3, 56
+; POWER-8-NEXT:    rldimi 4, 5, 48, 8
+; POWER-8-NEXT:    rldimi 4, 3, 56, 0
+; POWER-8-NEXT:    mr 3, 4
+; POWER-8-NEXT:    blr
+;
+; POWER-8-NO-ALTIVEC-LABEL: bswap64:
+; POWER-8-NO-ALTIVEC:       # %bb.0: # %entry
+; POWER-8-NO-ALTIVEC-NEXT:    rotldi 5, 3, 16
+; POWER-8-NO-ALTIVEC-NEXT:    rotldi 4, 3, 8
+; POWER-8-NO-ALTIVEC-NEXT:    rldimi 4, 5, 8, 48
+; POWER-8-NO-ALTIVEC-NEXT:    rotldi 5, 3, 24
+; POWER-8-NO-ALTIVEC-NEXT:    rldimi 4, 5, 16, 40
+; POWER-8-NO-ALTIVEC-NEXT:    rotldi 5, 3, 32
+; POWER-8-NO-ALTIVEC-NEXT:    rldimi 4, 5, 24, 32
+; POWER-8-NO-ALTIVEC-NEXT:    rotldi 5, 3, 48
+; POWER-8-NO-ALTIVEC-NEXT:    rldimi 4, 5, 40, 16
+; POWER-8-NO-ALTIVEC-NEXT:    rotldi 5, 3, 56
+; POWER-8-NO-ALTIVEC-NEXT:    rldimi 4, 5, 48, 8
+; POWER-8-NO-ALTIVEC-NEXT:    rldimi 4, 3, 56, 0
+; POWER-8-NO-ALTIVEC-NEXT:    mr 3, 4
+; POWER-8-NO-ALTIVEC-NEXT:    blr
+;
 ; CHECK-LABEL: bswap64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mtvsrdd 34, 3, 3
diff --git a/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll b/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
deleted file mode 100644
index 135cf3b3a5e93..0000000000000
--- a/llvm/test/CodeGen/PowerPC/p8-optimize-bswap64.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-ibm-aix \
-; RUN:     -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
-define noundef i64 @swap8_openxl(i64 noundef %value) local_unnamed_addr #0 {
-; CHECK-LABEL: swap8_openxl:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    rotldi r5, r3, 16
-; CHECK-NEXT:    rotldi r4, r3, 8
-; CHECK-NEXT:    rldimi r4, r5, 8, 48
-; CHECK-NEXT:    rotldi r5, r3, 24
-; CHECK-NEXT:    rldimi r4, r5, 16, 40
-; CHECK-NEXT:    rotldi r5, r3, 32
-; CHECK-NEXT:    rldimi r4, r5, 24, 32
-; CHECK-NEXT:    rotldi r5, r3, 48
-; CHECK-NEXT:    rldimi r4, r5, 40, 16
-; CHECK-NEXT:    rotldi r5, r3, 56
-; CHECK-NEXT:    rldimi r4, r5, 48, 8
-; CHECK-NEXT:    rldimi r4, r3, 56, 0
-; CHECK-NEXT:    mr r3, r4
-; CHECK-NEXT:    blr
-entry:
-  %0 = tail call i64 @llvm.bswap.i64(i64 %value)
-  ret i64 %0
-}



More information about the llvm-commits mailing list