[llvm] r245075 - [x86] fix allowsMisalignedMemoryAccess() implementation
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 14 10:53:40 PDT 2015
Author: spatel
Date: Fri Aug 14 12:53:40 2015
New Revision: 245075
URL: http://llvm.org/viewvc/llvm-project?rev=245075&view=rev
Log:
[x86] fix allowsMisalignedMemoryAccess() implementation
This patch fixes the x86 implementation of allowsMisalignedMemoryAccess() to correctly
return the 'Fast' output parameter for 32-byte accesses. To test that, an existing load
merging optimization is changed to use the TLI hook. This exposes a shortcoming in the
current logic and results in the regression test update. Changing other direct users of
the isUnalignedMem32Slow() x86 CPU attribute would be a follow-on patch.
Without the fix in allowsMisalignedMemoryAccesses(), we will infinite loop when targeting
SandyBridge because LowerINSERT_SUBVECTOR() creates 32-byte loads from two 16-byte loads
while PerformLOADCombine() splits them back into 16-byte loads.
Differential Revision: http://reviews.llvm.org/D10662
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=245075&r1=245074&r2=245075&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 14 12:53:40 2015
@@ -1915,8 +1915,14 @@ X86TargetLowering::allowsMisalignedMemor
unsigned,
unsigned,
bool *Fast) const {
- if (Fast)
- *Fast = Subtarget->isUnalignedMemAccessFast();
+ if (Fast) {
+ // FIXME: We should be checking 128-bit accesses separately from smaller
+ // accesses.
+ if (VT.getSizeInBits() == 256)
+ *Fast = !Subtarget->isUnalignedMem32Slow();
+ else
+ *Fast = Subtarget->isUnalignedMemAccessFast();
+ }
return true;
}
@@ -11259,14 +11265,25 @@ static SDValue LowerINSERT_SUBVECTOR(SDV
// --> load32 addr
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
- !Subtarget->isUnalignedMem32Slow()) {
- SDValue SubVec2 = Vec.getOperand(1);
- if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
- if (Idx2->getZExtValue() == 0) {
- SDValue Ops[] = { SubVec2, SubVec };
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
- return Ld;
+ OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+ auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+ if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
+ // If needed, look through a bitcast to get to the load.
+ if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
+ SubVec2 = SubVec2.getOperand(0);
+
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+ bool Fast;
+ unsigned Alignment = FirstLd->getAlignment();
+ unsigned AS = FirstLd->getAddressSpace();
+ const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ OpVT, AS, Alignment, &Fast) && Fast) {
+ SDValue Ops[] = { SubVec2, SubVec };
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
+ }
}
}
}
Modified: llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll?rev=245075&r1=245074&r2=245075&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll Fri Aug 14 12:53:40 2015
@@ -75,12 +75,12 @@ define <8 x float> @combine_16_byte_load
ret <8 x float> %v3
}
+; If the first load is 32-byte aligned, then the loads should be merged in all cases.
+
define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) {
-;; FIXME: The first load is 32-byte aligned, so the second load should get merged.
; AVXSLOW-LABEL: combine_16_byte_loads_aligned:
; AVXSLOW: # BB#0:
-; AVXSLOW-NEXT: vmovaps 48(%rdi), %xmm0
-; AVXSLOW-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVXSLOW-NEXT: vmovaps 48(%rdi), %ymm0
; AVXSLOW-NEXT: retq
;
; AVXFAST-LABEL: combine_16_byte_loads_aligned:
More information about the llvm-commits
mailing list