[llvm] 88e2055 - Revert "[DAGCombiner] Allow zextended load combines."
Clement Courbet via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 22 00:01:33 PST 2019
Author: Clement Courbet
Date: 2019-11-22T09:01:08+01:00
New Revision: 88e205525ca3b65f9b129d87e76e6e94c7ed032f
URL: https://github.com/llvm/llvm-project/commit/88e205525ca3b65f9b129d87e76e6e94c7ed032f
DIFF: https://github.com/llvm/llvm-project/commit/88e205525ca3b65f9b129d87e76e6e94c7ed032f.diff
LOG: Revert "[DAGCombiner] Allow zextended load combines."
Breaks some bots.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
llvm/test/CodeGen/AArch64/load-combine.ll
llvm/test/CodeGen/ARM/load-combine-big-endian.ll
llvm/test/CodeGen/ARM/load-combine.ll
llvm/test/CodeGen/X86/load-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 71ee8e141bfa..fb7ddf5b2339 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6750,6 +6750,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
unsigned ByteWidth = VT.getSizeInBits() / 8;
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
@@ -6772,21 +6778,11 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if all the bytes of the OR we are looking at are loaded from the same
// base address. Collect bytes offsets from Base address in ByteOffsets.
SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
- unsigned ZeroExtendedBytes = 0;
- for (int i = ByteWidth - 1; i >= 0; --i) {
+ for (unsigned i = 0; i < ByteWidth; i++) {
auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
- if (!P)
+ if (!P || !P->isMemory()) // All the bytes must be loaded from memory
return SDValue();
- if (P->isConstantZero()) {
- // It's OK for the N most significant bytes to be 0, we can just
- // zero-extend the load.
- if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
- return SDValue();
- continue;
- }
- assert(P->isMemory() && "provenance should either be memory or zero");
-
LoadSDNode *L = P->Load;
assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() &&
@@ -6825,23 +6821,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
assert(Base && "Base address of the accessed memory location must be set");
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
- bool NeedsZext = ZeroExtendedBytes > 0;
-
- EVT MemVT =
- EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
-
- // Before legalize we can introduce too wide illegal loads which will be later
- // split into legal sized loads. This enables us to combine i64 load by i8
- // patterns to a couple of i32 loads on 32 bit targets.
- if (LegalOperations &&
- !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
- MemVT))
- return SDValue();
-
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- Optional<bool> IsBigEndian = isBigEndian(
- makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
if (!IsBigEndian.hasValue())
return SDValue();
@@ -6854,8 +6836,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
LoadSDNode *FirstLoad = FirstByteProvider->Load;
// The node we are looking at matches with the pattern, check if we can
- // replace it with a single (possibly zero-extended) load and bswap + shift if
- // needed.
+ // replace it with a single load and bswap if needed.
// If the load needs byte swap check if the target supports it
bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
@@ -6863,45 +6844,25 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
// load and byte shuffling instead of several loads and byte shuffling.
- // We do not introduce illegal bswaps when zero-extending as this tends to
- // introduce too many arithmetic instructions.
- if (NeedsBswap && (LegalOperations || NeedsZext) &&
- !TLI.isOperationLegal(ISD::BSWAP, VT))
- return SDValue();
-
- // If we need to bswap and zero extend, we have to insert a shift. Check that
- // it is legal.
- if (NeedsBswap && NeedsZext && LegalOperations &&
- !TLI.isOperationLegal(ISD::SHL, VT))
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
- bool Allowed =
- TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- *FirstLoad->getMemOperand(), &Fast);
+ bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ VT, *FirstLoad->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
- SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
- SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), MemVT,
- FirstLoad->getAlignment());
+ SDValue NewLoad =
+ DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
// Transfer chain users from old loads to the new load.
for (LoadSDNode *L : Loads)
DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
- if (!NeedsBswap)
- return NewLoad;
-
- SDValue ShiftedLoad =
- NeedsZext
- ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
- DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
- SDLoc(N), LegalOperations))
- : NewLoad;
- return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
+ return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
}
// If the target has andn, bsl, or a similar bit-select instruction,
diff --git a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
index 19de95198c19..426bb880ed1b 100644
--- a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll
@@ -445,9 +445,10 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
define i32 @zext_load_i32_by_i8(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: lsl w8, w8, #16
-; CHECK-NEXT: rev w0, w8
+; CHECK-NEXT: ldrb w8, [x0]
+; CHECK-NEXT: ldrb w9, [x0, #1]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
@@ -514,7 +515,10 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w0, [x0]
+; CHECK-NEXT: ldrb w8, [x0, #1]
+; CHECK-NEXT: ldrb w9, [x0]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll
index 066ecb21dc10..906646cda15e 100644
--- a/llvm/test/CodeGen/AArch64/load-combine.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine.ll
@@ -431,7 +431,10 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
define i32 @zext_load_i32_by_i8(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w0, [x0]
+; CHECK-NEXT: ldrb w8, [x0]
+; CHECK-NEXT: ldrb w9, [x0, #1]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
@@ -498,9 +501,10 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: lsl w8, w8, #16
-; CHECK-NEXT: rev w0, w8
+; CHECK-NEXT: ldrb w8, [x0, #1]
+; CHECK-NEXT: ldrb w9, [x0]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%tmp = bitcast i32* %arg to i8*
diff --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
index 0ed85501a7b6..d045f1f96ee3 100644
--- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
+++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll
@@ -824,23 +824,25 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
;
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
; CHECK-ARMv6: @ %bb.0:
-; CHECK-ARMv6-NEXT: ldrh r0, [r0]
-; CHECK-ARMv6-NEXT: lsl r0, r0, #16
-; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
; CHECK-ARMv6-NEXT: bx lr
;
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
; CHECK-THUMBv6: @ %bb.0:
-; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
-; CHECK-THUMBv6-NEXT: lsls r0, r0, #16
-; CHECK-THUMBv6-NEXT: rev r0, r0
+; CHECK-THUMBv6-NEXT: ldrb r1, [r0]
+; CHECK-THUMBv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
+; CHECK-THUMBv6-NEXT: adds r0, r0, r1
; CHECK-THUMBv6-NEXT: bx lr
;
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
; CHECK-THUMBv7: @ %bb.0:
-; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
-; CHECK-THUMBv7-NEXT: lsls r0, r0, #16
-; CHECK-THUMBv7-NEXT: rev r0, r0
+; CHECK-THUMBv7-NEXT: ldrb r1, [r0]
+; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1]
+; CHECK-THUMBv7-NEXT: lsls r0, r0, #8
+; CHECK-THUMBv7-NEXT: adds r0, r0, r1
; CHECK-THUMBv7-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
@@ -960,22 +962,32 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
; CHECK: @ %bb.0:
-; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: orr r0, r0, r1, lsl #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
; CHECK-ARMv6: @ %bb.0:
-; CHECK-ARMv6-NEXT: ldrh r0, [r0]
+; CHECK-ARMv6-NEXT: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
; CHECK-ARMv6-NEXT: bx lr
;
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
; CHECK-THUMBv6: @ %bb.0:
-; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
+; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #1]
+; CHECK-THUMBv6-NEXT: ldrb r0, [r0]
+; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
+; CHECK-THUMBv6-NEXT: adds r0, r0, r1
; CHECK-THUMBv6-NEXT: bx lr
;
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
; CHECK-THUMBv7: @ %bb.0:
-; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
+; CHECK-THUMBv7-NEXT: ldrb r1, [r0, #1]
+; CHECK-THUMBv7-NEXT: ldrb r0, [r0]
+; CHECK-THUMBv7-NEXT: lsls r0, r0, #8
+; CHECK-THUMBv7-NEXT: adds r0, r0, r1
; CHECK-THUMBv7-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
diff --git a/llvm/test/CodeGen/ARM/load-combine.ll b/llvm/test/CodeGen/ARM/load-combine.ll
index bf03898c891d..d173a098b9bf 100644
--- a/llvm/test/CodeGen/ARM/load-combine.ll
+++ b/llvm/test/CodeGen/ARM/load-combine.ll
@@ -734,22 +734,31 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
define i32 @zext_load_i32_by_i8(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: ldrb r1, [r0]
+; CHECK-NEXT: ldrb r0, [r0, #1]
+; CHECK-NEXT: orr r0, r1, r0, lsl #8
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
; CHECK-ARMv6: @ %bb.0:
-; CHECK-ARMv6-NEXT: ldrh r0, [r0]
+; CHECK-ARMv6-NEXT: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
; CHECK-ARMv6-NEXT: bx lr
;
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
; CHECK-THUMBv6: @ %bb.0:
-; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
+; CHECK-THUMBv6-NEXT: ldrb r1, [r0]
+; CHECK-THUMBv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
+; CHECK-THUMBv6-NEXT: adds r0, r0, r1
; CHECK-THUMBv6-NEXT: bx lr
;
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
; CHECK-THUMBv7: @ %bb.0:
-; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
+; CHECK-THUMBv7-NEXT: ldrb r1, [r0]
+; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1]
+; CHECK-THUMBv7-NEXT: orr.w r0, r1, r0, lsl #8
; CHECK-THUMBv7-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
@@ -874,23 +883,24 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
;
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
; CHECK-ARMv6: @ %bb.0:
-; CHECK-ARMv6-NEXT: ldrh r0, [r0]
-; CHECK-ARMv6-NEXT: lsl r0, r0, #16
-; CHECK-ARMv6-NEXT: rev r0, r0
+; CHECK-ARMv6-NEXT: ldrb r1, [r0]
+; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
+; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
; CHECK-ARMv6-NEXT: bx lr
;
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
; CHECK-THUMBv6: @ %bb.0:
-; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
-; CHECK-THUMBv6-NEXT: lsls r0, r0, #16
-; CHECK-THUMBv6-NEXT: rev r0, r0
+; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #1]
+; CHECK-THUMBv6-NEXT: ldrb r0, [r0]
+; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
+; CHECK-THUMBv6-NEXT: adds r0, r0, r1
; CHECK-THUMBv6-NEXT: bx lr
;
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
; CHECK-THUMBv7: @ %bb.0:
-; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
-; CHECK-THUMBv7-NEXT: lsls r0, r0, #16
-; CHECK-THUMBv7-NEXT: rev r0, r0
+; CHECK-THUMBv7-NEXT: ldrb r1, [r0]
+; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1]
+; CHECK-THUMBv7-NEXT: orr.w r0, r0, r1, lsl #8
; CHECK-THUMBv7-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll
index 5184e99d0180..1d08ee065315 100644
--- a/llvm/test/CodeGen/X86/load-combine.ll
+++ b/llvm/test/CodeGen/X86/load-combine.ll
@@ -1119,12 +1119,18 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzwl (%eax), %eax
+; CHECK-NEXT: movzbl (%eax), %ecx
+; CHECK-NEXT: movzbl 1(%eax), %eax
+; CHECK-NEXT: shll $8, %eax
+; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: zext_load_i32_by_i8:
; CHECK64: # %bb.0:
-; CHECK64-NEXT: movzwl (%rdi), %eax
+; CHECK64-NEXT: movzbl (%rdi), %ecx
+; CHECK64-NEXT: movzbl 1(%rdi), %eax
+; CHECK64-NEXT: shll $8, %eax
+; CHECK64-NEXT: orl %ecx, %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
@@ -1212,16 +1218,18 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzwl (%eax), %eax
-; CHECK-NEXT: shll $16, %eax
-; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: movzbl 1(%eax), %ecx
+; CHECK-NEXT: movzbl (%eax), %eax
+; CHECK-NEXT: shll $8, %eax
+; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
; CHECK64: # %bb.0:
-; CHECK64-NEXT: movzwl (%rdi), %eax
-; CHECK64-NEXT: shll $16, %eax
-; CHECK64-NEXT: bswapl %eax
+; CHECK64-NEXT: movzbl 1(%rdi), %ecx
+; CHECK64-NEXT: movzbl (%rdi), %eax
+; CHECK64-NEXT: shll $8, %eax
+; CHECK64-NEXT: orl %ecx, %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
More information about the llvm-commits
mailing list