[PATCH] D10662: [x86] fix allowsMisalignedMemoryAccess() implementation
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 14 10:54:28 PDT 2015
This revision was automatically updated to reflect the committed changes.
Closed by commit rL245075: [x86] fix allowsMisalignedMemoryAccess() implementation (authored by spatel).
Changed prior to commit:
http://reviews.llvm.org/D10662?vs=31982&id=32165#toc
Repository:
rL LLVM
http://reviews.llvm.org/D10662
Files:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll
Index: llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll
+++ llvm/trunk/test/CodeGen/X86/unaligned-32-byte-memops.ll
@@ -75,12 +75,12 @@
ret <8 x float> %v3
}
+; If the first load is 32-byte aligned, then the loads should be merged in all cases.
+
define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) {
-;; FIXME: The first load is 32-byte aligned, so the second load should get merged.
; AVXSLOW-LABEL: combine_16_byte_loads_aligned:
; AVXSLOW: # BB#0:
-; AVXSLOW-NEXT: vmovaps 48(%rdi), %xmm0
-; AVXSLOW-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVXSLOW-NEXT: vmovaps 48(%rdi), %ymm0
; AVXSLOW-NEXT: retq
;
; AVXFAST-LABEL: combine_16_byte_loads_aligned:
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -1915,8 +1915,14 @@
unsigned,
unsigned,
bool *Fast) const {
- if (Fast)
- *Fast = Subtarget->isUnalignedMemAccessFast();
+ if (Fast) {
+ // FIXME: We should be checking 128-bit accesses separately from smaller
+ // accesses.
+ if (VT.getSizeInBits() == 256)
+ *Fast = !Subtarget->isUnalignedMem32Slow();
+ else
+ *Fast = Subtarget->isUnalignedMemAccessFast();
+ }
return true;
}
@@ -11259,14 +11265,25 @@
// --> load32 addr
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
- !Subtarget->isUnalignedMem32Slow()) {
- SDValue SubVec2 = Vec.getOperand(1);
- if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
- if (Idx2->getZExtValue() == 0) {
- SDValue Ops[] = { SubVec2, SubVec };
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
- return Ld;
+ OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+ auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+ if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
+ // If needed, look through a bitcast to get to the load.
+ if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
+ SubVec2 = SubVec2.getOperand(0);
+
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+ bool Fast;
+ unsigned Alignment = FirstLd->getAlignment();
+ unsigned AS = FirstLd->getAddressSpace();
+ const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ OpVT, AS, Alignment, &Fast) && Fast) {
+ SDValue Ops[] = { SubVec2, SubVec };
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
+ }
}
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D10662.32165.patch
Type: text/x-patch
Size: 3183 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150814/74ac2137/attachment.bin>
More information about the llvm-commits
mailing list