[PATCH] D10662: [x86] fix allowsMisalignedMemoryAccess() implementation
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 12 14:37:47 PDT 2015
spatel updated this revision to Diff 31982.
spatel added a comment.
We now have a decent (if not perfect) TLI.allowsMemoryAccess() after r243549 (http://reviews.llvm.org/D10905). That makes this patch considerably simpler: change a load merging optimization to use the new hook and fix the 'fast' reporting for x86 misaligned 32-byte accesses.
Without the fix in allowsMisalignedMemoryAccesses(), we will infinite loop when targeting SandyBridge because LowerINSERT_SUBVECTOR() creates 32-byte loads from two 16-byte loads while PerformLOADCombine() splits them back into 16-byte loads.
http://reviews.llvm.org/D10662
Files:
lib/Target/X86/X86ISelLowering.cpp
test/CodeGen/X86/unaligned-32-byte-memops.ll
Index: test/CodeGen/X86/unaligned-32-byte-memops.ll
===================================================================
--- test/CodeGen/X86/unaligned-32-byte-memops.ll
+++ test/CodeGen/X86/unaligned-32-byte-memops.ll
@@ -75,12 +75,12 @@
ret <8 x float> %v3
}
+; If the first load is 32-byte aligned, then the loads should be merged in all cases.
+
define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) {
-;; FIXME: The first load is 32-byte aligned, so the second load should get merged.
; AVXSLOW-LABEL: combine_16_byte_loads_aligned:
; AVXSLOW: # BB#0:
-; AVXSLOW-NEXT: vmovaps 48(%rdi), %xmm0
-; AVXSLOW-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVXSLOW-NEXT: vmovaps 48(%rdi), %ymm0
; AVXSLOW-NEXT: retq
;
; AVXFAST-LABEL: combine_16_byte_loads_aligned:
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -1903,8 +1903,12 @@
unsigned,
unsigned,
bool *Fast) const {
- if (Fast)
- *Fast = Subtarget->isUnalignedMemAccessFast();
+ if (Fast) {
+ if (VT.getSizeInBits() == 256)
+ *Fast = !Subtarget->isUnalignedMem32Slow();
+ else
+ *Fast = Subtarget->isUnalignedMemAccessFast();
+ }
return true;
}
@@ -11246,14 +11250,25 @@
// --> load32 addr
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
- !Subtarget->isUnalignedMem32Slow()) {
- SDValue SubVec2 = Vec.getOperand(1);
- if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
- if (Idx2->getZExtValue() == 0) {
- SDValue Ops[] = { SubVec2, SubVec };
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
- return Ld;
+ OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+ auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+ if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
+ // If needed, look through a bitcast to get to the load.
+ if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
+ SubVec2 = SubVec2.getOperand(0);
+
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+ bool Fast;
+ unsigned Alignment = FirstLd->getAlignment();
+ unsigned AS = FirstLd->getAddressSpace();
+ const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ OpVT, AS, Alignment, &Fast) && Fast) {
+ SDValue Ops[] = { SubVec2, SubVec };
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
+ }
}
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D10662.31982.patch
Type: text/x-patch
Size: 3021 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150812/ef7a2569/attachment.bin>
More information about the llvm-commits
mailing list