[llvm] e18cc52 - [SDAG] try to canonicalize logical shift after bswap

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 30 06:30:44 PDT 2022


Author: Sanjay Patel
Date: 2022-03-30T09:29:32-04:00
New Revision: e18cc5277fd8f29da6ba5273a2f4c8359754ccb6

URL: https://github.com/llvm/llvm-project/commit/e18cc5277fd8f29da6ba5273a2f4c8359754ccb6
DIFF: https://github.com/llvm/llvm-project/commit/e18cc5277fd8f29da6ba5273a2f4c8359754ccb6.diff

LOG: [SDAG] try to canonicalize logical shift after bswap

When shifting by a byte-multiple:
bswap (shl X, C) --> lshr (bswap X), C
bswap (lshr X, C) --> shl (bswap X), C

This is the backend version of D122010 and an alternative
suggested in D120648.
There's an extra check to make sure the shift amount is
valid that was not in the rough draft.

I'm not sure if there is a larger motivating case for RISCV (bug report?),
but the ARM diffs show a benefit from having a late version of the
transform (because we do not combine the loads in IR).

Differential Revision: https://reviews.llvm.org/D122655

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/arm64-rev.ll
    llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
    llvm/test/CodeGen/AArch64/load-combine.ll
    llvm/test/CodeGen/ARM/load-combine-big-endian.ll
    llvm/test/CodeGen/ARM/load-combine.ll
    llvm/test/CodeGen/RISCV/bswap-shift.ll
    llvm/test/CodeGen/X86/combine-bswap.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 585872065ad49..05719a92f8370 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9753,6 +9753,21 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
     }
   }
 
+  // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
+  // inverse-shift-of-bswap:
+  // bswap (X u<< C) --> (bswap X) u>> C
+  // bswap (X u>> C) --> (bswap X) u<< C
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+      N0.hasOneUse()) {
+    auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
+        ShAmt->getZExtValue() % 8 == 0) {
+      SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
+      unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
+      return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
+    }
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index f48b46641821e..fe05c3020a4b6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -653,8 +653,7 @@ define void @test_bswap32_narrow(i32* %p0, i16* %p1) nounwind {
 ; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-NEXT:    ldrh w8, [x0, #2]
 ; CHECK-NEXT:    mov x19, x1
-; CHECK-NEXT:    lsl w8, w8, #16
-; CHECK-NEXT:    rev w0, w8
+; CHECK-NEXT:    rev16 w0, w8
 ; CHECK-NEXT:    bl gid_tbl_len
 ; CHECK-NEXT:    strh wzr, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
index 14a0162d5269c..43e04e341b7e1 100644
--- a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
@@ -442,8 +442,7 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
 ; CHECK-LABEL: zext_load_i32_by_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    lsl w8, w8, #16
-; CHECK-NEXT:    rev w0, w8
+; CHECK-NEXT:    rev16 w0, w8
 ; CHECK-NEXT:    ret
   %tmp = bitcast i32* %arg to i8*
   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll
index 066ecb21dc107..06f19d830552a 100644
--- a/llvm/test/CodeGen/AArch64/load-combine.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine.ll
@@ -499,8 +499,7 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    lsl w8, w8, #16
-; CHECK-NEXT:    rev w0, w8
+; CHECK-NEXT:    rev16 w0, w8
 ; CHECK-NEXT:    ret
 
   %tmp = bitcast i32* %arg to i8*

diff  --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
index 0ed85501a7b67..e8673b91df8cc 100644
--- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
@@ -825,22 +825,19 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
 ; CHECK-ARMv6:       @ %bb.0:
 ; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
-; CHECK-ARMv6-NEXT:    lsl r0, r0, #16
-; CHECK-ARMv6-NEXT:    rev r0, r0
+; CHECK-ARMv6-NEXT:    rev16 r0, r0
 ; CHECK-ARMv6-NEXT:    bx lr
 ;
 ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
 ; CHECK-THUMBv6:       @ %bb.0:
 ; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
-; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
-; CHECK-THUMBv6-NEXT:    rev r0, r0
+; CHECK-THUMBv6-NEXT:    rev16 r0, r0
 ; CHECK-THUMBv6-NEXT:    bx lr
 ;
 ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
 ; CHECK-THUMBv7:       @ %bb.0:
 ; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
-; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
-; CHECK-THUMBv7-NEXT:    rev r0, r0
+; CHECK-THUMBv7-NEXT:    rev16 r0, r0
 ; CHECK-THUMBv7-NEXT:    bx lr
 
   %tmp = bitcast i32* %arg to i8*

diff  --git a/llvm/test/CodeGen/ARM/load-combine.ll b/llvm/test/CodeGen/ARM/load-combine.ll
index bf03898c891d4..1a4153f8355fa 100644
--- a/llvm/test/CodeGen/ARM/load-combine.ll
+++ b/llvm/test/CodeGen/ARM/load-combine.ll
@@ -875,22 +875,19 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
 ; CHECK-ARMv6:       @ %bb.0:
 ; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
-; CHECK-ARMv6-NEXT:    lsl r0, r0, #16
-; CHECK-ARMv6-NEXT:    rev r0, r0
+; CHECK-ARMv6-NEXT:    rev16 r0, r0
 ; CHECK-ARMv6-NEXT:    bx lr
 ;
 ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
 ; CHECK-THUMBv6:       @ %bb.0:
 ; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
-; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
-; CHECK-THUMBv6-NEXT:    rev r0, r0
+; CHECK-THUMBv6-NEXT:    rev16 r0, r0
 ; CHECK-THUMBv6-NEXT:    bx lr
 ;
 ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
 ; CHECK-THUMBv7:       @ %bb.0:
 ; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
-; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
-; CHECK-THUMBv7-NEXT:    rev r0, r0
+; CHECK-THUMBv7-NEXT:    rev16 r0, r0
 ; CHECK-THUMBv7-NEXT:    bx lr
 
   %tmp = bitcast i32* %arg to i8*

diff  --git a/llvm/test/CodeGen/RISCV/bswap-shift.ll b/llvm/test/CodeGen/RISCV/bswap-shift.ll
index 2e1a50be83451..38effeaeede7b 100644
--- a/llvm/test/CodeGen/RISCV/bswap-shift.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-shift.ll
@@ -41,16 +41,12 @@ define i16 @test_bswap_srli_7_bswap_i16(i16 %a) nounwind {
 define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_srli_8_bswap_i16:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    andi a0, a0, 255
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    srli a0, a0, 16
+; RV32ZB-NEXT:    slli a0, a0, 8
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_srli_8_bswap_i16:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    andi a0, a0, 255
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 48
+; RV64ZB-NEXT:    slli a0, a0, 8
 ; RV64ZB-NEXT:    ret
     %1 = call i16 @llvm.bswap.i16(i16 %a)
     %2 = lshr i16 %1, 8
@@ -61,17 +57,12 @@ define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
 define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_srli_8_bswap_i32:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    srli a0, a0, 8
-; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    slli a0, a0, 8
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_srli_8_bswap_i32:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 40
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    slliw a0, a0, 8
 ; RV64ZB-NEXT:    ret
     %1 = call i32 @llvm.bswap.i32(i32 %a)
     %2 = lshr i32 %1, 8
@@ -82,17 +73,12 @@ define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
 define i32 @test_bswap_srli_16_bswap_i32(i32 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_srli_16_bswap_i32:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    srli a0, a0, 16
-; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    slli a0, a0, 16
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_srli_16_bswap_i32:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 48
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    slliw a0, a0, 16
 ; RV64ZB-NEXT:    ret
     %1 = call i32 @llvm.bswap.i32(i32 %a)
     %2 = lshr i32 %1, 16
@@ -103,15 +89,12 @@ define i32 @test_bswap_srli_16_bswap_i32(i32 %a) nounwind {
 define i32 @test_bswap_srli_24_bswap_i32(i32 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_srli_24_bswap_i32:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    andi a0, a0, 255
-; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    slli a0, a0, 24
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_srli_24_bswap_i32:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    andi a0, a0, 255
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    slliw a0, a0, 24
 ; RV64ZB-NEXT:    ret
     %1 = call i32 @llvm.bswap.i32(i32 %a)
     %2 = lshr i32 %1, 24
@@ -122,17 +105,13 @@ define i32 @test_bswap_srli_24_bswap_i32(i32 %a) nounwind {
 define i64 @test_bswap_srli_48_bswap_i64(i64 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_srli_48_bswap_i64:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    srli a0, a0, 16
-; RV32ZB-NEXT:    rev8 a1, a0
+; RV32ZB-NEXT:    slli a1, a0, 16
 ; RV32ZB-NEXT:    li a0, 0
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_srli_48_bswap_i64:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 48
-; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    slli a0, a0, 48
 ; RV64ZB-NEXT:    ret
     %1 = call i64 @llvm.bswap.i64(i64 %a)
     %2 = lshr i64 %1, 48
@@ -167,16 +146,14 @@ define i16 @test_bswap_shli_7_bswap_i16(i16 %a) nounwind {
 define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_shli_8_bswap_i16:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    andi a0, a0, -256
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    srli a0, a0, 16
+; RV32ZB-NEXT:    slli a0, a0, 16
+; RV32ZB-NEXT:    srli a0, a0, 24
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_shli_8_bswap_i16:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    andi a0, a0, -256
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 48
+; RV64ZB-NEXT:    slli a0, a0, 48
+; RV64ZB-NEXT:    srli a0, a0, 56
 ; RV64ZB-NEXT:    ret
     %1 = call i16 @llvm.bswap.i16(i16 %a)
     %2 = shl i16 %1, 8
@@ -187,18 +164,12 @@ define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
 define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_shli_8_bswap_i32:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    slli a0, a0, 8
-; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 8
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_shli_8_bswap_i32:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 24
-; RV64ZB-NEXT:    andi a0, a0, -256
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    srliw a0, a0, 8
 ; RV64ZB-NEXT:    ret
     %1 = call i32 @llvm.bswap.i32(i32 %a)
     %2 = shl i32 %1, 8
@@ -209,18 +180,12 @@ define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
 define i32 @test_bswap_shli_16_bswap_i32(i32 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_shli_16_bswap_i32:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a0, a0
-; RV32ZB-NEXT:    slli a0, a0, 16
-; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 16
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_shli_16_bswap_i32:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 32
-; RV64ZB-NEXT:    slli a0, a0, 16
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    srliw a0, a0, 16
 ; RV64ZB-NEXT:    ret
     %1 = call i32 @llvm.bswap.i32(i32 %a)
     %2 = shl i32 %1, 16
@@ -231,17 +196,12 @@ define i32 @test_bswap_shli_16_bswap_i32(i32 %a) nounwind {
 define i32 @test_bswap_shli_24_bswap_i32(i32 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_shli_24_bswap_i32:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    lui a1, 1044480
-; RV32ZB-NEXT:    and a0, a0, a1
-; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 24
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_shli_24_bswap_i32:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    lui a1, 1044480
-; RV64ZB-NEXT:    and a0, a0, a1
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    srliw a0, a0, 24
 ; RV64ZB-NEXT:    ret
     %1 = call i32 @llvm.bswap.i32(i32 %a)
     %2 = shl i32 %1, 24
@@ -252,17 +212,13 @@ define i32 @test_bswap_shli_24_bswap_i32(i32 %a) nounwind {
 define i64 @test_bswap_shli_48_bswap_i64(i64 %a) nounwind {
 ; RV32ZB-LABEL: test_bswap_shli_48_bswap_i64:
 ; RV32ZB:       # %bb.0:
-; RV32ZB-NEXT:    rev8 a0, a1
-; RV32ZB-NEXT:    slli a0, a0, 16
-; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a1, 16
 ; RV32ZB-NEXT:    li a1, 0
 ; RV32ZB-NEXT:    ret
 ;
 ; RV64ZB-LABEL: test_bswap_shli_48_bswap_i64:
 ; RV64ZB:       # %bb.0:
-; RV64ZB-NEXT:    rev8 a0, a0
-; RV64ZB-NEXT:    slli a0, a0, 48
-; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 48
 ; RV64ZB-NEXT:    ret
     %1 = call i64 @llvm.bswap.i64(i64 %a)
     %2 = shl i64 %1, 48

diff  --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll
index 0e6fd2cdc39aa..f119bbbaba01b 100644
--- a/llvm/test/CodeGen/X86/combine-bswap.ll
+++ b/llvm/test/CodeGen/X86/combine-bswap.ll
@@ -39,19 +39,18 @@ define i32 @test_bswap_bswap(i32 %a0) nounwind {
   ret i32 %c
 }
 
-; TODO: fold (bswap(srl (bswap c), x)) -> (shl c, x)
 define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
 ; X86-LABEL: test_bswap_srli_8_bswap_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    rolw $8, %ax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bswap_srli_8_bswap_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl %dil, %eax
-; X64-NEXT:    rolw $8, %ax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shll $8, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
     %1 = call i16 @llvm.bswap.i16(i16 %a)
@@ -64,17 +63,13 @@ define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
 ; X86-LABEL: test_bswap_srli_8_bswap_i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    shrl $8, %eax
-; X86-NEXT:    bswapl %eax
+; X86-NEXT:    shll $8, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bswap_srli_8_bswap_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    bswapl %eax
-; X64-NEXT:    shrl $8, %eax
-; X64-NEXT:    bswapl %eax
+; X64-NEXT:    shll $8, %eax
 ; X64-NEXT:    retq
     %1 = call i32 @llvm.bswap.i32(i32 %a)
     %2 = lshr i32 %1, 8
@@ -87,20 +82,13 @@ define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    bswapl %edx
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    shrdl $16, %eax, %edx
-; X86-NEXT:    shrl $16, %eax
-; X86-NEXT:    bswapl %edx
-; X86-NEXT:    bswapl %eax
+; X86-NEXT:    shll $16, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bswap_srli_16_bswap_i64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    shrq $16, %rax
-; X64-NEXT:    bswapq %rax
+; X64-NEXT:    shlq $16, %rax
 ; X64-NEXT:    retq
     %1 = call i64 @llvm.bswap.i64(i64 %a)
     %2 = lshr i64 %1, 16
@@ -108,21 +96,17 @@ define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind {
     ret i64 %3
 }
 
-; TODO: fold (bswap(shl (bswap c), x)) -> (srl c, x)
 define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
 ; X86-LABEL: test_bswap_shli_8_bswap_i16:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shll $8, %eax
-; X86-NEXT:    rolw $8, %ax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bswap_shli_8_bswap_i16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl $65280, %eax # imm = 0xFF00
-; X64-NEXT:    rolw $8, %ax
+; X64-NEXT:    movzbl %ah, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
     %1 = call i16 @llvm.bswap.i16(i16 %a)
@@ -135,17 +119,13 @@ define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
 ; X86-LABEL: test_bswap_shli_8_bswap_i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    shll $8, %eax
-; X86-NEXT:    bswapl %eax
+; X86-NEXT:    shrl $8, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bswap_shli_8_bswap_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    bswapl %eax
-; X64-NEXT:    shll $8, %eax
-; X64-NEXT:    bswapl %eax
+; X64-NEXT:    shrl $8, %eax
 ; X64-NEXT:    retq
     %1 = call i32 @llvm.bswap.i32(i32 %a)
     %2 = shl i32 %1, 8
@@ -157,21 +137,13 @@ define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind {
 ; X86-LABEL: test_bswap_shli_16_bswap_i64:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    bswapl %ecx
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    shldl $16, %ecx, %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    rolw $8, %cx
-; X86-NEXT:    movzwl %cx, %edx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_bswap_shli_16_bswap_i64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    shlq $16, %rax
-; X64-NEXT:    bswapq %rax
+; X64-NEXT:    shrq $16, %rax
 ; X64-NEXT:    retq
     %1 = call i64 @llvm.bswap.i64(i64 %a)
     %2 = shl i64 %1, 16


        


More information about the llvm-commits mailing list