[llvm] f1bbf3a - Revert "[DAGCombiner] allow load/store merging if pairs can be rotated into place"
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 13 05:56:15 PDT 2020
Author: Sanjay Patel
Date: 2020-07-13T08:55:29-04:00
New Revision: f1bbf3acb42a7447c170b8248e310d8a61443377
URL: https://github.com/llvm/llvm-project/commit/f1bbf3acb42a7447c170b8248e310d8a61443377
DIFF: https://github.com/llvm/llvm-project/commit/f1bbf3acb42a7447c170b8248e310d8a61443377.diff
LOG: Revert "[DAGCombiner] allow load/store merging if pairs can be rotated into place"
This reverts commit 591a3af5c7acc05617c0eacf6ae4f76bd8a9a6ce.
The commit message was cut off and failed to include the review citation.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/merge-store-dependency.ll
llvm/test/CodeGen/X86/stores-merging.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dd601bd5ca7e..42e6e12f3f02 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16541,27 +16541,14 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother merging.
Align RequiredAlignment;
- bool NeedRotate = false;
- if (LoadNodes.size() == 2) {
- // If we have load/store pair instructions and we only have two values,
- // don't bother merging.
- if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
- StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
- LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
- break;
- }
- // If the loads are reversed, see if we can rotate the halves into place.
- int64_t Offset0 = LoadNodes[0].OffsetFromBase;
- int64_t Offset1 = LoadNodes[1].OffsetFromBase;
- EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
- if (Offset0 - Offset1 == ElementSizeBytes &&
- (hasOperation(ISD::ROTL, PairVT) ||
- hasOperation(ISD::ROTR, PairVT))) {
- std::swap(LoadNodes[0], LoadNodes[1]);
- NeedRotate = true;
- }
+ if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+ break;
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
@@ -16726,18 +16713,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
NewLoad = DAG.getLoad(
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
- SDValue StoreOp = NewLoad;
- if (NeedRotate) {
- unsigned LoadWidth = ElementSizeBytes * 8 * 2;
- assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
- "Unexpected type for rotate-able load pair");
- SDValue RotAmt =
- DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
- // Target can convert to the identical ROTR if it does not have ROTL.
- StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
- }
NewStore = DAG.getStore(
- NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
index 6850846fec06..77b7012d2ed1 100644
--- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -95,8 +95,6 @@ exit:
ret void
}
-; TODO: rev16?
-
define void @rotate16_in_place(i8* %p) {
; A53-LABEL: rotate16_in_place:
; A53: // %bb.0:
@@ -114,8 +112,6 @@ define void @rotate16_in_place(i8* %p) {
ret void
}
-; TODO: rev16?
-
define void @rotate16(i8* %p, i8* %q) {
; A53-LABEL: rotate16:
; A53: // %bb.0:
@@ -138,9 +134,10 @@ define void @rotate16(i8* %p, i8* %q) {
define void @rotate32_in_place(i16* %p) {
; A53-LABEL: rotate32_in_place:
; A53: // %bb.0:
-; A53-NEXT: ldr w8, [x0]
-; A53-NEXT: ror w8, w8, #16
-; A53-NEXT: str w8, [x0]
+; A53-NEXT: ldrh w8, [x0, #2]
+; A53-NEXT: ldrh w9, [x0]
+; A53-NEXT: strh w8, [x0]
+; A53-NEXT: strh w9, [x0, #2]
; A53-NEXT: ret
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@@ -154,9 +151,10 @@ define void @rotate32_in_place(i16* %p) {
define void @rotate32(i16* %p) {
; A53-LABEL: rotate32:
; A53: // %bb.0:
-; A53-NEXT: ldr w8, [x0]
-; A53-NEXT: ror w8, w8, #16
-; A53-NEXT: str w8, [x0, #84]
+; A53-NEXT: ldrh w8, [x0, #2]
+; A53-NEXT: ldrh w9, [x0]
+; A53-NEXT: strh w8, [x0, #84]
+; A53-NEXT: strh w9, [x0, #86]
; A53-NEXT: ret
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@@ -169,8 +167,6 @@ define void @rotate32(i16* %p) {
ret void
}
-; Prefer paired memops over rotate.
-
define void @rotate64_in_place(i32* %p) {
; A53-LABEL: rotate64_in_place:
; A53: // %bb.0:
@@ -186,8 +182,6 @@ define void @rotate64_in_place(i32* %p) {
ret void
}
-; Prefer paired memops over rotate.
-
define void @rotate64(i32* %p) {
; A53-LABEL: rotate64:
; A53: // %bb.0:
diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll
index 60fd01eac096..768684067f32 100644
--- a/llvm/test/CodeGen/X86/stores-merging.ll
+++ b/llvm/test/CodeGen/X86/stores-merging.ll
@@ -246,7 +246,10 @@ define void @pr43446_1(i8* %a) {
define void @rotate16_in_place(i8* %p) {
; CHECK-LABEL: rotate16_in_place:
; CHECK: # %bb.0:
-; CHECK-NEXT: rolw $8, (%rdi)
+; CHECK-NEXT: movb (%rdi), %al
+; CHECK-NEXT: movb 1(%rdi), %cl
+; CHECK-NEXT: movb %cl, (%rdi)
+; CHECK-NEXT: movb %al, 1(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i8, i8* %p, i64 0
%p1 = getelementptr i8, i8* %p, i64 1
@@ -260,9 +263,10 @@ define void @rotate16_in_place(i8* %p) {
define void @rotate16(i8* %p, i8* %q) {
; CHECK-LABEL: rotate16:
; CHECK: # %bb.0:
-; CHECK-NEXT: movzwl (%rdi), %eax
-; CHECK-NEXT: rolw $8, %ax
-; CHECK-NEXT: movw %ax, (%rsi)
+; CHECK-NEXT: movb (%rdi), %al
+; CHECK-NEXT: movb 1(%rdi), %cl
+; CHECK-NEXT: movb %cl, (%rsi)
+; CHECK-NEXT: movb %al, 1(%rsi)
; CHECK-NEXT: retq
%p0 = getelementptr i8, i8* %p, i64 0
%p1 = getelementptr i8, i8* %p, i64 1
@@ -278,7 +282,10 @@ define void @rotate16(i8* %p, i8* %q) {
define void @rotate32_in_place(i16* %p) {
; CHECK-LABEL: rotate32_in_place:
; CHECK: # %bb.0:
-; CHECK-NEXT: roll $16, (%rdi)
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: movzwl 2(%rdi), %ecx
+; CHECK-NEXT: movw %cx, (%rdi)
+; CHECK-NEXT: movw %ax, 2(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@@ -292,9 +299,10 @@ define void @rotate32_in_place(i16* %p) {
define void @rotate32(i16* %p) {
; CHECK-LABEL: rotate32:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: roll $16, %eax
-; CHECK-NEXT: movl %eax, 84(%rdi)
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: movzwl 2(%rdi), %ecx
+; CHECK-NEXT: movw %cx, 84(%rdi)
+; CHECK-NEXT: movw %ax, 86(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@@ -310,7 +318,10 @@ define void @rotate32(i16* %p) {
define void @rotate64_in_place(i32* %p) {
; CHECK-LABEL: rotate64_in_place:
; CHECK: # %bb.0:
-; CHECK-NEXT: rolq $32, (%rdi)
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: movl 4(%rdi), %ecx
+; CHECK-NEXT: movl %ecx, (%rdi)
+; CHECK-NEXT: movl %eax, 4(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i32, i32* %p, i64 0
%p1 = getelementptr i32, i32* %p, i64 1
@@ -324,9 +335,10 @@ define void @rotate64_in_place(i32* %p) {
define void @rotate64(i32* %p) {
; CHECK-LABEL: rotate64:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: rolq $32, %rax
-; CHECK-NEXT: movq %rax, 8(%rdi)
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: movl 4(%rdi), %ecx
+; CHECK-NEXT: movl %ecx, 8(%rdi)
+; CHECK-NEXT: movl %eax, 12(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i32, i32* %p, i64 0
%p1 = getelementptr i32, i32* %p, i64 1
@@ -342,9 +354,10 @@ define void @rotate64(i32* %p) {
define void @rotate64_iterate(i16* %p) {
; CHECK-LABEL: rotate64_iterate:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: rolq $32, %rax
-; CHECK-NEXT: movq %rax, 84(%rdi)
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: movl 4(%rdi), %ecx
+; CHECK-NEXT: movl %ecx, 84(%rdi)
+; CHECK-NEXT: movl %eax, 88(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@@ -365,8 +378,6 @@ define void @rotate64_iterate(i16* %p) {
ret void
}
-; TODO: recognize this as 2 rotates?
-
define void @rotate32_consecutive(i16* %p) {
; CHECK-LABEL: rotate32_consecutive:
; CHECK: # %bb.0:
@@ -398,17 +409,17 @@ define void @rotate32_consecutive(i16* %p) {
ret void
}
-; Same as above, but now the stores are not all consecutive.
-
define void @rotate32_twice(i16* %p) {
; CHECK-LABEL: rotate32_twice:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: movl 4(%rdi), %ecx
-; CHECK-NEXT: roll $16, %eax
-; CHECK-NEXT: roll $16, %ecx
-; CHECK-NEXT: movl %eax, 84(%rdi)
-; CHECK-NEXT: movl %ecx, 108(%rdi)
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: movzwl 2(%rdi), %ecx
+; CHECK-NEXT: movzwl 4(%rdi), %edx
+; CHECK-NEXT: movzwl 6(%rdi), %esi
+; CHECK-NEXT: movw %cx, 84(%rdi)
+; CHECK-NEXT: movw %ax, 86(%rdi)
+; CHECK-NEXT: movw %si, 108(%rdi)
+; CHECK-NEXT: movw %dx, 110(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
More information about the llvm-commits
mailing list