[llvm-commits] [llvm] r159991 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2012-07-10-extload64.ll test/CodeGen/X86/vec_compare-2.ll test/CodeGen/X86/widen_load-0.ll
Nadav Rotem
nadav.rotem at intel.com
Tue Jul 10 06:25:08 PDT 2012
Author: nadav
Date: Tue Jul 10 08:25:08 2012
New Revision: 159991
URL: http://llvm.org/viewvc/llvm-project?rev=159991&view=rev
Log:
Improve the loading of load-anyext vectors by allowing the codegen to load
multiple scalars and insert them into a vector. Next, we shuffle the elements
into the correct places, as before.
Also fix a small dagcombine bug in SimplifyBinOpWithSameOpcodeHands, when the
migration of bitcasts happened too late in the SelectionDAG process.
Added:
llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_compare-2.ll
llvm/trunk/test/CodeGen/X86/widen_load-0.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=159991&r1=159990&r2=159991&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jul 10 08:25:08 2012
@@ -2340,7 +2340,7 @@
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
- && Level == AfterLegalizeVectorOps) {
+ && Level == AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=159991&r1=159990&r2=159991&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 10 08:25:08 2012
@@ -14425,7 +14425,8 @@
/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
LoadSDNode *Ld = cast<LoadSDNode>(N);
EVT RegVT = Ld->getValueType(0);
EVT MemVT = Ld->getMemoryVT();
@@ -14447,47 +14448,73 @@
unsigned RegSz = RegVT.getSizeInBits();
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
- // All sizes must be a power of two
- if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue();
- // Attempt to load the original value using a single load op.
- // Find a scalar type which is equal to the loaded word size.
+ // All sizes must be a power of two.
+ if (!isPowerOf2_32(RegSz * MemSz * NumElems))
+ return SDValue();
+
+ // Attempt to load the original value using scalar loads.
+ // Find the largest scalar type that divides the total loaded size.
MVT SclrLoadTy = MVT::i8;
for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) {
+ if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
SclrLoadTy = Tp;
- break;
}
}
- // Proceed if a load word is found.
- if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue();
+ // Calculate the number of scalar loads that we need to perform
+ // in order to load our vector from memory.
+ unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
+ // Represent our vector as a sequence of elements which are the
+ // largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
RegSz/SclrLoadTy.getSizeInBits());
+ // Represent the data using the same element type that is stored in
+ // memory. In practice, we ''widen'' MemVT.
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
RegSz/MemVT.getScalarType().getSizeInBits());
- // Can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
- // Perform a single load.
- SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
- Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->isVolatile(),
- Ld->isNonTemporal(), Ld->isInvariant(),
- Ld->getAlignment());
+ assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
+ "Invalid vector type");
+
+ // We can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT))
+ return SDValue();
+
+ SmallVector<SDValue, 8> Chains;
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8,
+ TLI.getPointerTy());
+ SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
- // Insert the word loaded into a vector.
- SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
- LoadUnitVecVT, ScalarLoad);
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ // Perform a single load.
+ SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
+ Ptr, Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+ Chains.push_back(ScalarLoad.getValue(1));
+ // Create the first element type using SCALAR_TO_VECTOR in order to avoid
+ // another round of DAGCombining.
+ if (i == 0)
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
+ else
+ Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
+ ScalarLoad, DAG.getIntPtrConstant(i));
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
+ Chains.size());
// Bitcast the loaded value to a vector of the original element type, in
// the size of the target vector type.
- SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT,
- ScalarInVector);
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
unsigned SizeRatio = RegSz/MemSz;
// Redistribute the loaded elements into the different locations.
@@ -14503,8 +14530,7 @@
Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
// Replace the original load with the new sequence
// and return the new chain.
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff);
- return SDValue(ScalarLoad.getNode(), 1);
+ return DCI.CombineTo(N, Shuff, TF, true);
}
return SDValue();
@@ -14574,8 +14600,9 @@
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
- // Can't shuffle using an illegal type
- if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+ // Can't shuffle using an illegal type.
+ if (!TLI.isTypeLegal(WideVecVT))
+ return SDValue();
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
DAG.getUNDEF(WideVecVT),
@@ -15308,7 +15335,7 @@
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
- case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
+ case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
Added: llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll?rev=159991&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2012-07-10-extload64.ll Tue Jul 10 08:25:08 2012
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: load_store
+define void @load_store(<4 x i16>* %in) {
+entry:
+ %A27 = load <4 x i16>* %in, align 4
+ %A28 = add <4 x i16> %A27, %A27
+ store <4 x i16> %A28, <4 x i16>* %in, align 4
+ ret void
+; CHECK: movd
+; CHECK: pinsrd
+; CHECK: ret
+}
Modified: llvm/trunk/test/CodeGen/X86/vec_compare-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_compare-2.ll?rev=159991&r1=159990&r2=159991&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_compare-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_compare-2.ll Tue Jul 10 08:25:08 2012
@@ -10,8 +10,7 @@
entry:
; CHECK: cfi_def_cfa_offset
; CHECK-NOT: set
-; CHECK: movzwl
-; CHECK: movzwl
+; CHECK: punpcklwd
; CHECK: pshufd
; CHECK: pshufb
%shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
Modified: llvm/trunk/test/CodeGen/X86/widen_load-0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_load-0.ll?rev=159991&r1=159990&r2=159991&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_load-0.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_load-0.ll Tue Jul 10 08:25:08 2012
@@ -1,18 +1,12 @@
; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
-; RUN: llc < %s -o - -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s -check-prefix=WIN64
; PR4891
; Both loads should happen before either store.
-; CHECK: movd ({{.*}}), {{.*}}
-; CHECK: movd ({{.*}}), {{.*}}
-; CHECK: movd {{.*}}, ({{.*}})
-; CHECK: movd {{.*}}, ({{.*}})
-
-; WIN64: movd ({{.*}}), {{.*}}
-; WIN64: movd ({{.*}}), {{.*}}
-; WIN64: movd {{.*}}, ({{.*}})
-; WIN64: movd {{.*}}, ({{.*}})
+; CHECK: movl ({{.*}}), {{.*}}
+; CHECK: movl ({{.*}}), {{.*}}
+; CHECK: movl {{.*}}, ({{.*}})
+; CHECK: movl {{.*}}, ({{.*}})
define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
entry:
More information about the llvm-commits
mailing list