[llvm] r318390 - [DAGCombine] Enable more srl -> load combines
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 16 03:28:26 PST 2017
Author: sam_parker
Date: Thu Nov 16 03:28:26 2017
New Revision: 318390
URL: http://llvm.org/viewvc/llvm-project?rev=318390&view=rev
Log:
[DAGCombine] Enable more srl -> load combines
Change the calculation for the desired ValueType for non-sign
extending loads, as in those cases we don't care about the
higher bits. This creates a smaller ExtVT and allows for such
combinations as:
(srl (zextload i16, [addr]), 8) -> (zextload i8, [addr + 1])
Differential Revision: https://reviews.llvm.org/D40034
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/ARM/shift-combine.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=318390&r1=318389&r2=318390&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Nov 16 03:28:26 2017
@@ -8041,13 +8041,24 @@ SDValue DAGCombiner::ReduceLoadWidth(SDN
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
} else if (Opc == ISD::SRL) {
- // Another special-case: SRL is basically zero-extending a narrower value.
+ // Another special-case: SRL is basically zero-extending a narrower value,
+ // or it maybe shifting a higher subword, half or byte into the lowest
+ // bits.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
- ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!N01) return SDValue();
- ExtVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() - N01->getZExtValue());
+
+ auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
+ auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01 || !LN0)
+ return SDValue();
+
+ uint64_t ShiftAmt = N01->getZExtValue();
+ uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
+ if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
+ else
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - ShiftAmt);
}
if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
return SDValue();
Modified: llvm/trunk/test/CodeGen/ARM/shift-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/shift-combine.ll?rev=318390&r1=318389&r2=318390&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/shift-combine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/shift-combine.ll Thu Nov 16 03:28:26 2017
@@ -1,15 +1,20 @@
-; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-COMMON
+; RUN: llc -mtriple=armv7eb-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-COMMON
+; RUN: llc -mtriple=thumbv7m %s -o - | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-COMMON
+; RUN: llc -mtriple=thumbv7m -mattr=+strict-align %s -o - | FileCheck %s --check-prefix=CHECK-ALIGN --check-prefix=CHECK-COMMON
+; RUN: llc -mtriple=thumbv6m %s -o - | FileCheck %s --check-prefix=CHECK-V6M
@array = weak global [4 x i32] zeroinitializer
define i32 @test_lshr_and1(i32 %x) {
entry:
;CHECK-LABLE: test_lshr_and1:
-;CHECK: movw r1, :lower16:array
-;CHECK-NEXT: and r0, r0, #12
-;CHECK-NEXT: movt r1, :upper16:array
-;CHECK-NEXT: ldr r0, [r1, r0]
-;CHECK-NEXT: bx lr
+;CHECK-COMMON: movw r1, :lower16:array
+;CHECK-COMMON-NEXT: and r0, r0, #12
+;CHECK-COMMON-NEXT: movt r1, :upper16:array
+;CHECK-COMMON-NEXT: ldr r0, [r1, r0]
+;CHECK-COMMON-NEXT: bx lr
%tmp2 = lshr i32 %x, 2
%tmp3 = and i32 %tmp2, 3
%tmp4 = getelementptr [4 x i32], [4 x i32]* @array, i32 0, i32 %tmp3
@@ -18,10 +23,11 @@ entry:
}
define i32 @test_lshr_and2(i32 %x) {
entry:
-;CHECK-LABLE: test_lshr_and2:
-;CHECK: ubfx r0, r0, #1, #15
-;CHECK-NEXT: add r0, r0, r0
-;CHECK-NEXT: bx lr
+;CHECK-LABEL: test_lshr_and2:
+;CHECK-COMMON: ubfx r0, r0, #1, #15
+;CHECK-ARM: add r0, r0, r0
+;CHECK-THUMB: add r0, r0
+;CHECK-COMMON: bx lr
%a = and i32 %x, 65534
%b = lshr i32 %a, 1
%c = and i32 %x, 65535
@@ -29,3 +35,185 @@ entry:
%e = add i32 %b, %d
ret i32 %e
}
+
+; CHECK-LABEL: test_lshr_load1
+; CHECK-BE: ldrb r0, [r0]
+; CHECK-COMMON: ldrb r0, [r0, #1]
+; CHECK-COMMON-NEXT: bx
+define arm_aapcscc i32 @test_lshr_load1(i16* %a) {
+entry:
+ %0 = load i16, i16* %a, align 2
+ %conv1 = zext i16 %0 to i32
+ %1 = lshr i32 %conv1, 8
+ ret i32 %1
+}
+
+; CHECK-LABEL: test_lshr_load1_sext
+; CHECK-ARM: ldrsh r0, [r0]
+; CHECK-ARM-NEXT: lsr r0, r0, #8
+; CHECK-THUMB: ldrsh.w r0, [r0]
+; CHECK-THUMB-NEXT: lsrs r0, r0, #8
+; CHECK-COMMON: bx
+define arm_aapcscc i32 @test_lshr_load1_sext(i16* %a) {
+entry:
+ %0 = load i16, i16* %a, align 2
+ %conv1 = sext i16 %0 to i32
+ %1 = lshr i32 %conv1, 8
+ ret i32 %1
+}
+
+; CHECK-LABEL: test_lshr_load1_fail
+; CHECK-COMMON: ldrh r0, [r0]
+; CHECK-ARM: lsr r0, r0, #9
+; CHECK-THUMB: lsrs r0, r0, #9
+; CHECK-COMMON: bx
+define arm_aapcscc i32 @test_lshr_load1_fail(i16* %a) {
+entry:
+ %0 = load i16, i16* %a, align 2
+ %conv1 = zext i16 %0 to i32
+ %1 = lshr i32 %conv1, 9
+ ret i32 %1
+}
+
+; CHECK-LABEL: test_lshr_load32
+; CHECK-COMMON: ldr r0, [r0]
+; CHECK-ARM: lsr r0, r0, #8
+; CHECK-THUMB: lsrs r0, r0, #8
+; CHECK-COMMON: bx
+define arm_aapcscc i32 @test_lshr_load32(i32* %a) {
+entry:
+ %0 = load i32, i32* %a, align 4
+ %1 = lshr i32 %0, 8
+ ret i32 %1
+}
+
+; CHECK-LABEL: test_lshr_load32_2
+; CHECK-BE: ldrh r0, [r0]
+; CHECK-COMMON: ldrh r0, [r0, #2]
+; CHECK-COMMON-NEXT: bx
+define arm_aapcscc i32 @test_lshr_load32_2(i32* %a) {
+entry:
+ %0 = load i32, i32* %a, align 4
+ %1 = lshr i32 %0, 16
+ ret i32 %1
+}
+
+; CHECK-LABEL: test_lshr_load32_1
+; CHECK-BE: ldrb r0, [r0]
+; CHECK-COMMON: ldrb r0, [r0, #3]
+; CHECK-COMMON-NEXT: bx
+define arm_aapcscc i32 @test_lshr_load32_1(i32* %a) {
+entry:
+ %0 = load i32, i32* %a, align 4
+ %1 = lshr i32 %0, 24
+ ret i32 %1
+}
+
+; CHECK-LABEL: test_lshr_load32_fail
+; CHECK-BE: ldr r0, [r0]
+; CHECK-BE-NEXT: lsr r0, r0, #15
+; CHECK-COMMON: ldr r0, [r0]
+; CHECK-ARM: lsr r0, r0, #15
+; CHECK-THUMB: lsrs r0, r0, #15
+; CHECK-COMMON: bx
+define arm_aapcscc i32 @test_lshr_load32_fail(i32* %a) {
+entry:
+ %0 = load i32, i32* %a, align 4
+ %1 = lshr i32 %0, 15
+ ret i32 %1
+}
+
+; CHECK-LABEL: test_lshr_load64_4_unaligned
+; CHECK-BE: ldr [[HIGH:r[0-9]+]], [r0]
+; CHECK-BE-NEXT: ldrh [[LOW:r[0-9]+]], [r0, #4]
+; CHECK-BE-NEXT: orr r0, [[LOW]], [[HIGH]], lsl #16
+; CHECK-V6M: ldrh [[LOW:r[0-9]+]], [r0, #2]
+; CHECK-V6M: ldr [[HIGH:r[0-9]+]], [r0, #4]
+; CHECK-V6M-NEXT: lsls [[HIGH]], [[HIGH]], #16
+; CHECK-V6M-NEXT: orrs r0, r1
+; CHECK-ALIGN: ldr [[HIGH:r[0-9]+]], [r0, #4]
+; CHECK-ALIGN-NEXT: ldrh [[LOW:r[0-9]+]], [r0, #2]
+; CHECK-ALIGN-NEXT: orr.w r0, [[LOW]], [[HIGH]], lsl #16
+; CHECK-ARM: ldr r0, [r0, #2]
+; CHECK-THUMB: ldr.w r0, [r0, #2]
+; CHECK-COMMON: bx
+define arm_aapcscc i32 @test_lshr_load64_4_unaligned(i64* %a) {
+entry:
+ %0 = load i64, i64* %a, align 8
+ %1 = lshr i64 %0, 16
+ %conv = trunc i64 %1 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: test_lshr_load64_1_lsb
+; CHECK-BE: ldr r1, [r0]
+; CHECK-BE-NEXT: ldrb r0, [r0, #4]
+; CHECK-BE-NEXT: orr r0, r0, r1, lsl #8
+; CHECK-ARM: ldr r0, [r0, #3]
+; CHECK-THUMB: ldr.w r0, [r0, #3]
+; CHECK-ALIGN: ldr [[HIGH:r[0-9]+]], [r0, #4]
+; CHECK-ALIGN-NEXT: ldrb [[LOW:r[0-9]+]], [r0, #3]
+; CHECK-ALIGN-NEXT: orr.w r0, [[LOW]], [[HIGH]], lsl #8
+; CHECK-COMMON: bx
+define arm_aapcscc i32 @test_lshr_load64_1_lsb(i64* %a) {
+entry:
+ %0 = load i64, i64* %a, align 8
+ %1 = lshr i64 %0, 24
+ %conv = trunc i64 %1 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: test_lshr_load64_1_msb
+; CHECK-BE: ldrb r0, [r0]
+; CHECK-BE-NEXT: bx
+; CHECK-COMMON: ldrb r0, [r0, #7]
+; CHECK-COMMON-NEXT: bx
+define arm_aapcscc i32 @test_lshr_load64_1_msb(i64* %a) {
+entry:
+ %0 = load i64, i64* %a, align 8
+ %1 = lshr i64 %0, 56
+ %conv = trunc i64 %1 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: test_lshr_load64_4
+; CHECK-BE: ldr r0, [r0]
+; CHECK-BE-NEXT: bx
+; CHECK-COMMON: ldr r0, [r0, #4]
+; CHECK-COMMON-NEXT: bx
+define arm_aapcscc i32 @test_lshr_load64_4(i64* %a) {
+entry:
+ %0 = load i64, i64* %a, align 8
+ %1 = lshr i64 %0, 32
+ %conv = trunc i64 %1 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: test_lshr_load64_2
+; CHECK-BE: ldrh r0, [r0]
+; CHECK-BE-NEXT: bx
+; CHECK-COMMON: ldrh r0, [r0, #6]
+; CHECK-COMMON-NEXT:bx
+define arm_aapcscc i32 @test_lshr_load64_2(i64* %a) {
+entry:
+ %0 = load i64, i64* %a, align 8
+ %1 = lshr i64 %0, 48
+ %conv = trunc i64 %1 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: test_lshr_load4_fail
+; CHECK-COMMON: ldrd r0, r1, [r0]
+; CHECK-ARM: lsr r0, r0, #8
+; CHECK-ARM-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-THUMB: lsrs r0, r0, #8
+; CHECK-THUMB-NEXT: orr.w r0, r0, r1, lsl #24
+; CHECK-COMMON: bx
+define arm_aapcscc i32 @test_lshr_load4_fail(i64* %a) {
+entry:
+ %0 = load i64, i64* %a, align 8
+ %1 = lshr i64 %0, 8
+ %conv = trunc i64 %1 to i32
+ ret i32 %conv
+}
+
More information about the llvm-commits
mailing list