[llvm-commits] [llvm] r136691 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/avx-256-splat.ll
Bruno Cardoso Lopes
bruno.cardoso at gmail.com
Tue Aug 2 09:06:18 PDT 2011
Author: bruno
Date: Tue Aug 2 11:06:18 2011
New Revision: 136691
URL: http://llvm.org/viewvc/llvm-project?rev=136691&view=rev
Log:
Make this kind of lowering to be supported by 256-bit instructions:
shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
To:
shuffle (vload ptr)), undef, <1, 1, 1, 1>
Fix PR10494
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx-256-splat.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=136691&r1=136690&r2=136691&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Aug 2 11:06:18 2011
@@ -4566,42 +4566,52 @@
return SDValue();
}
+ // FIXME: 256-bit vector instructions don't require a strict alignment,
+ // improve this code to support it better.
+ unsigned RequiredAlign = VT.getSizeInBits()/8;
SDValue Chain = LD->getChain();
- // Make sure the stack object alignment is at least 16.
+ // Make sure the stack object alignment is at least 16 or 32.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
if (MFI->isFixedObjectIndex(FI)) {
// Can't change the alignment. FIXME: It's possible to compute
// the exact stack offset and reference FI + adjust offset instead.
// If someone *really* cares about this. That's the way to implement it.
return SDValue();
} else {
- MFI->setObjectAlignment(FI, 16);
+ MFI->setObjectAlignment(FI, RequiredAlign);
}
}
- // (Offset % 16) must be multiple of 4. Then address is then
+ // (Offset % 16 or 32) must be multiple of 4. Then address is then
// Ptr + (Offset & ~15).
if (Offset < 0)
return SDValue();
- if ((Offset % 16) & 3)
+ if ((Offset % RequiredAlign) & 3)
return SDValue();
- int64_t StartOffset = Offset & ~15;
+ int64_t StartOffset = Offset & ~(RequiredAlign-1);
if (StartOffset)
Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
int EltNo = (Offset - StartOffset) >> 2;
- int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
- EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
- SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+ int NumElems = VT.getVectorNumElements();
+
+ EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
+ SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, 0);
- // Canonicalize it to a v4i32 shuffle.
- V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getVectorShuffle(MVT::v4i32, dl, V1,
- DAG.getUNDEF(MVT::v4i32),&Mask[0]));
+
+ // Canonicalize it to a v4i32 or v8i32 shuffle.
+ SmallVector<int, 8> Mask;
+ for (int i = 0; i < NumElems; ++i)
+ Mask.push_back(EltNo);
+
+ V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1);
+ return DAG.getNode(ISD::BITCAST, dl, NVT,
+ DAG.getVectorShuffle(CanonVT, dl, V1,
+ DAG.getUNDEF(CanonVT),&Mask[0]));
}
return SDValue();
Modified: llvm/trunk/test/CodeGen/X86/avx-256-splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-256-splat.ll?rev=136691&r1=136690&r2=136691&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-256-splat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-256-splat.ll Tue Aug 2 11:06:18 2011
@@ -45,3 +45,35 @@
%vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
ret <4 x double> %vecinit6.i
}
+
+; Test this simple opt:
+; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+; To:
+; shuffle (vload ptr)), undef, <1, 1, 1, 1>
+; CHECK: vmovaps
+; CHECK-NEXT: vpextrd
+define void @funcE() nounwind {
+allocas:
+ %udx495 = alloca [18 x [18 x float]], align 32
+ br label %for_test505.preheader
+
+for_test505.preheader: ; preds = %for_test505.preheader, %allocas
+ br i1 undef, label %for_exit499, label %for_test505.preheader
+
+for_exit499: ; preds = %for_test505.preheader
+ br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
+
+load.i1247: ; preds = %for_exit499
+ %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
+ %ptr.i1237 = bitcast float* %ptr1227 to i32*
+ %val.i1238 = load i32* %ptr.i1237, align 4
+ %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
+ %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
+ %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
+ br label %__load_and_broadcast_32.exit1249
+
+__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
+ %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
+ ret void
+}
+
More information about the llvm-commits
mailing list