[llvm] ba6485e - [SDAG] add demanded bits transform for bswap
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 17 15:33:55 PST 2022
Author: Sanjay Patel
Date: 2022-01-17T18:25:42-05:00
New Revision: ba6485e25fc56468f34cc8a6938d66d3c5f46596
URL: https://github.com/llvm/llvm-project/commit/ba6485e25fc56468f34cc8a6938d66d3c5f46596
DIFF: https://github.com/llvm/llvm-project/commit/ba6485e25fc56468f34cc8a6938d66d3c5f46596.diff
LOG: [SDAG] add demanded bits transform for bswap
A possible codegen regression for PowerPC is noted in D117406
because we don't recognize a pattern that demands only 1 byte
from a bswap.
This fold has existed in IR since close to the beginning of LLVM:
https://github.com/llvm/llvm-project/blame/main/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp#L794
...so this patch copies that code as much as possible and adapts
it for SDAG.
The test for PowerPC that would change in D117406 is over-reduced
with undefs, so I recreated it for AArch64 and x86 by passing in
pointer args and renamed the values to make the logic clearer.
Differential Revision: https://reviews.llvm.org/D117508
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/bswap-known-bits.ll
llvm/test/CodeGen/X86/combine-bswap.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index dde4cb7820e0d..ea6a7e16bcdbd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1806,6 +1806,35 @@ bool TargetLowering::SimplifyDemandedBits(
}
case ISD::BSWAP: {
SDValue Src = Op.getOperand(0);
+
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedBits.countLeadingZeros();
+ unsigned NTZ = DemandedBits.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ &= ~7;
+ NTZ &= ~7;
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth - NLZ - NTZ == 8) {
+ unsigned ResultBit = NTZ;
+ unsigned InputBit = BitWidth - NTZ - 8;
+
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ unsigned ShiftOpcode = InputBit > ResultBit ? ISD::SRL : ISD::SHL;
+ if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
+ unsigned ShiftAmount =
+ InputBit > ResultBit ? InputBit - ResultBit : ResultBit - InputBit;
+ SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
+ SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
APInt DemandedSrcBits = DemandedBits.byteSwap();
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
diff --git a/llvm/test/CodeGen/AArch64/bswap-known-bits.ll b/llvm/test/CodeGen/AArch64/bswap-known-bits.ll
index 9048fc9aa72c4..442caf7d9b8e8 100644
--- a/llvm/test/CodeGen/AArch64/bswap-known-bits.ll
+++ b/llvm/test/CodeGen/AArch64/bswap-known-bits.ll
@@ -66,7 +66,7 @@ define i8 @demand_one_byte0(i32 %x) {
define i32 @demand_one_byte1(i32 %x) {
; CHECK-LABEL: demand_one_byte1:
; CHECK: ; %bb.0:
-; CHECK-NEXT: rev w8, w0
+; CHECK-NEXT: lsr w8, w0, #8
; CHECK-NEXT: and w0, w8, #0xff00
; CHECK-NEXT: ret
%b = call i32 @llvm.bswap.i32(i32 %x)
@@ -77,7 +77,7 @@ define i32 @demand_one_byte1(i32 %x) {
define i32 @demand_one_byte2(i32 %x) {
; CHECK-LABEL: demand_one_byte2:
; CHECK: ; %bb.0:
-; CHECK-NEXT: rev w8, w0
+; CHECK-NEXT: lsl w8, w0, #8
; CHECK-NEXT: orr w0, w8, #0xff00ffff
; CHECK-NEXT: ret
%b = call i32 @llvm.bswap.i32(i32 %x)
@@ -88,8 +88,7 @@ define i32 @demand_one_byte2(i32 %x) {
define i64 @demand_one_byte3(i64 %x) {
; CHECK-LABEL: demand_one_byte3:
; CHECK: ; %bb.0:
-; CHECK-NEXT: rev x8, x0
-; CHECK-NEXT: lsr x0, x8, #56
+; CHECK-NEXT: and x0, x0, #0xff
; CHECK-NEXT: ret
%b = call i64 @llvm.bswap.i64(i64 %x)
%r = lshr i64 %b, 56
@@ -99,9 +98,7 @@ define i64 @demand_one_byte3(i64 %x) {
define void @demand_one_loaded_byte(i64* %xp, i32* %yp) {
; CHECK-LABEL: demand_one_loaded_byte:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ldr x8, [x0]
-; CHECK-NEXT: lsr x8, x8, #8
-; CHECK-NEXT: rev w8, w8
+; CHECK-NEXT: ldrb w8, [x0, #4]
; CHECK-NEXT: strb w8, [x1]
; CHECK-NEXT: ret
%x = load i64, i64* %xp, align 8
diff --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll
index 1d02e30fa2b41..4fbb3bf98171f 100644
--- a/llvm/test/CodeGen/X86/combine-bswap.ll
+++ b/llvm/test/CodeGen/X86/combine-bswap.ll
@@ -62,18 +62,13 @@ define void @demand_one_loaded_byte(i64* %xp, i32* %yp) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %ecx
-; X86-NEXT: shldl $24, %edx, %ecx
-; X86-NEXT: bswapl %ecx
+; X86-NEXT: movb 4(%ecx), %cl
; X86-NEXT: movb %cl, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: demand_one_loaded_byte:
; X64: # %bb.0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: shrq $8, %rax
-; X64-NEXT: bswapl %eax
+; X64-NEXT: movb 4(%rdi), %al
; X64-NEXT: movb %al, (%rsi)
; X64-NEXT: retq
%x = load i64, i64* %xp, align 8
More information about the llvm-commits
mailing list