[llvm] [DAGCombine] Improve bswap lowering for machines that support bit rotates (PR #164848)
    via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Thu Oct 23 10:36:18 PDT 2025
    
    
  
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/164848
>From 2cee8e29245d631b3a5f9df8ca7fbccdd7781bbd Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Thu, 23 Oct 2025 12:54:48 -0400
Subject: [PATCH] [DAGCombine] Improve bswap lowering for machines that support
 bit rotates
Source: Hacker's delight.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 12 +++
 .../CodeGen/ARM/load-combine-big-endian.ll    | 93 ++++++++-----------
 llvm/test/CodeGen/ARM/load-combine.ll         | 69 ++++++--------
 3 files changed, 78 insertions(+), 96 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 920dff935daed..3accdb722f911 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9899,6 +9899,18 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
     // Use a rotate by 8. This can be further expanded if necessary.
     return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
   case MVT::i32:
+    if (isOperationLegal(ISD::ROTR, VT)) {
+      // (x & 0x00FF00FF) rotl 8 | (x rotr 8) & 0x00FF00FF
+      SDValue And = DAG.getNode(ISD::AND, dl, VT, Op,
+                                DAG.getConstant(0x00FF00FF, dl, VT));
+      SDValue Rotl =
+          DAG.getNode(ISD::ROTL, dl, VT, And, DAG.getConstant(8, dl, SHVT));
+      SDValue Rotr =
+          DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+      SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotr,
+                                 DAG.getConstant(0x00FF00FF, dl, VT));
+      return DAG.getNode(ISD::OR, dl, VT, Rotl, And2);
+    }
     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
     Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
                        DAG.getConstant(0xFF00, dl, VT));
diff --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
index 4b6d14efd0ecb..4f933b1ed780c 100644
--- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
@@ -53,14 +53,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
 ; BSWAP is not supported by 32 bit target
 ; CHECK-LABEL: load_i32_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -223,21 +221,16 @@ define i32 @load_i32_by_i16_i8(ptr %arg) {
 define i64 @load_i64_by_i8_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i64_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r2, #255
 ; CHECK-NEXT:    ldr r1, [r0]
-; CHECK-NEXT:    mov r12, #65280
 ; CHECK-NEXT:    ldr r0, [r0, #4]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r3, r12, r0, lsr #8
-; CHECK-NEXT:    orr r3, r3, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    and r2, r12, r1, lsr #8
-; CHECK-NEXT:    orr r0, r0, r3
-; CHECK-NEXT:    and r3, r1, #65280
-; CHECK-NEXT:    orr r2, r2, r1, lsr #24
-; CHECK-NEXT:    lsl r1, r1, #24
-; CHECK-NEXT:    orr r1, r1, r3, lsl #8
-; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    orr r2, r2, #16711680
+; CHECK-NEXT:    and r3, r0, r2
+; CHECK-NEXT:    and r0, r2, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r3, ror #24
+; CHECK-NEXT:    and r3, r1, r2
+; CHECK-NEXT:    and r1, r2, r1, lsr #8
+; CHECK-NEXT:    orr r1, r1, r3, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -377,14 +370,12 @@ define i64 @load_i64_by_i8(ptr %arg) {
 define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #1]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
@@ -434,14 +425,12 @@ define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
 define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #-4]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
@@ -587,14 +576,12 @@ declare i16 @llvm.bswap.i16(i16)
 define i32 @load_i32_by_bswap_i16(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_bswap_i16:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
@@ -667,14 +654,12 @@ define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    add r0, r0, r1
-; CHECK-NEXT:    mov r1, #65280
+; CHECK-NEXT:    mov r1, #255
+; CHECK-NEXT:    orr r1, r1, #16711680
 ; CHECK-NEXT:    ldr r0, [r0, #12]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
@@ -733,14 +718,12 @@ define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    add r0, r1, r0
-; CHECK-NEXT:    mov r1, #65280
+; CHECK-NEXT:    mov r1, #255
+; CHECK-NEXT:    orr r1, r1, #16711680
 ; CHECK-NEXT:    ldr r0, [r0, #13]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
diff --git a/llvm/test/CodeGen/ARM/load-combine.ll b/llvm/test/CodeGen/ARM/load-combine.ll
index 0f6ec8aa47386..7dcf59a2af18b 100644
--- a/llvm/test/CodeGen/ARM/load-combine.ll
+++ b/llvm/test/CodeGen/ARM/load-combine.ll
@@ -117,14 +117,12 @@ define i32 @load_i32_by_i8_bswap(ptr %arg) {
 ; BSWAP is not supported by 32 bit target
 ; CHECK-LABEL: load_i32_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
@@ -237,21 +235,16 @@ define i64 @load_i64_by_i8(ptr %arg) {
 define i64 @load_i64_by_i8_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i64_by_i8_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r2, #255
 ; CHECK-NEXT:    ldr r1, [r0]
-; CHECK-NEXT:    mov r12, #65280
 ; CHECK-NEXT:    ldr r0, [r0, #4]
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r3, r12, r0, lsr #8
-; CHECK-NEXT:    orr r3, r3, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    and r2, r12, r1, lsr #8
-; CHECK-NEXT:    orr r0, r0, r3
-; CHECK-NEXT:    and r3, r1, #65280
-; CHECK-NEXT:    orr r2, r2, r1, lsr #24
-; CHECK-NEXT:    lsl r1, r1, #24
-; CHECK-NEXT:    orr r1, r1, r3, lsl #8
-; CHECK-NEXT:    orr r1, r1, r2
+; CHECK-NEXT:    orr r2, r2, #16711680
+; CHECK-NEXT:    and r3, r0, r2
+; CHECK-NEXT:    and r0, r2, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r3, ror #24
+; CHECK-NEXT:    and r3, r1, r2
+; CHECK-NEXT:    and r1, r2, r1, lsr #8
+; CHECK-NEXT:    orr r1, r1, r3, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
@@ -413,14 +406,12 @@ define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
 define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #1]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
@@ -469,14 +460,12 @@ define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
 define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0, #-4]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
@@ -527,14 +516,12 @@ declare i16 @llvm.bswap.i16(i16)
 define i32 @load_i32_by_bswap_i16(ptr %arg) {
 ; CHECK-LABEL: load_i32_by_bswap_i16:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r1, #255
 ; CHECK-NEXT:    ldr r0, [r0]
-; CHECK-NEXT:    mov r1, #65280
-; CHECK-NEXT:    and r2, r0, #65280
-; CHECK-NEXT:    and r1, r1, r0, lsr #8
-; CHECK-NEXT:    orr r1, r1, r0, lsr #24
-; CHECK-NEXT:    lsl r0, r0, #24
-; CHECK-NEXT:    orr r0, r0, r2, lsl #8
-; CHECK-NEXT:    orr r0, r0, r1
+; CHECK-NEXT:    orr r1, r1, #16711680
+; CHECK-NEXT:    and r2, r0, r1
+; CHECK-NEXT:    and r0, r1, r0, lsr #8
+; CHECK-NEXT:    orr r0, r0, r2, ror #24
 ; CHECK-NEXT:    mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
    
    
More information about the llvm-commits
mailing list