[llvm-commits] [llvm] r106038 - in /llvm/trunk: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp test/CodeGen/Generic/v-binop-widen.ll test/CodeGen/Generic/v-binop-widen2.ll
Mon P Wang
wangmp at apple.com
Tue Jun 15 13:29:05 PDT 2010
Author: wangmp
Date: Tue Jun 15 15:29:05 2010
New Revision: 106038
URL: http://llvm.org/viewvc/llvm-project?rev=106038&view=rev
Log:
Fixed vector widening of binary instructions that can trap. Patch by Visa Putkinen!
Added:
llvm/trunk/test/CodeGen/Generic/v-binop-widen.ll
llvm/trunk/test/CodeGen/Generic/v-binop-widen2.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=106038&r1=106037&r2=106038&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Tue Jun 15 15:29:05 2010
@@ -1271,7 +1271,7 @@
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
- while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
@@ -1286,13 +1286,20 @@
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
} else {
// Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
SmallVector<SDValue, 16> ConcatOps(CurNumElts);
unsigned ConcatEnd = 0; // Current ConcatOps index.
- unsigned Idx = 0; // Current Idx into input vectors.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest synthesizable vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
@@ -1303,26 +1310,21 @@
Idx += NumElts;
CurNumElts -= NumElts;
}
- EVT PrevVecVT = VT;
do {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+ } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
if (NumElts == 1) {
- // Since we are using concat vector, build a vector from the scalar ops.
- SDValue VecOp = DAG.getUNDEF(PrevVecVT);
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp1, DAG.getIntPtrConstant(Idx));
SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp2, DAG.getIntPtrConstant(Idx));
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
- DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
- DAG.getIntPtrConstant(i));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
}
CurNumElts = 0;
- ConcatOps[ConcatEnd++] = VecOp;
}
}
@@ -1333,23 +1335,65 @@
return ConcatOps[0];
}
- // Rebuild vector to one with the widen type
- Idx = ConcatEnd - 1;
- while (Idx != 0) {
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
VT = ConcatOps[Idx--].getValueType();
- while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
- --Idx;
- if (Idx != 0) {
- VT = ConcatOps[Idx].getValueType();
- ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- &ConcatOps[Idx+1], ConcatEnd - Idx - 1);
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeSynthesizable(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+ }
+ ConcatOps[Idx+1] = VecOp;
ConcatEnd = Idx + 2;
+ }
+ else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
}
}
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
- unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps =
+ WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(VT);
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
for (unsigned j = ConcatEnd; j < NumOps; ++j)
ConcatOps[j] = UndefVal;
}
@@ -1379,7 +1423,7 @@
return DAG.getNode(Opcode, dl, WidenVT, InOp);
}
- if (TLI.isTypeLegal(InWidenVT)) {
+ if (TLI.isTypeSynthesizable(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1521,7 +1565,7 @@
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
- if (TLI.isTypeLegal(NewInVT)) {
+ if (TLI.isTypeSynthesizable(NewInVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1662,7 +1706,7 @@
SatOp, CvtCode);
}
- if (TLI.isTypeLegal(InWidenVT)) {
+ if (TLI.isTypeSynthesizable(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1988,7 +2032,7 @@
if (InWidenSize % Size == 0 && !VT.isVector()) {
unsigned NewNumElts = InWidenSize / Size;
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
- if (TLI.isTypeLegal(NewVT)) {
+ if (TLI.isTypeSynthesizable(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getIntPtrConstant(0));
@@ -2086,7 +2130,7 @@
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
- if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
@@ -2100,7 +2144,7 @@
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
Added: llvm/trunk/test/CodeGen/Generic/v-binop-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Generic/v-binop-widen.ll?rev=106038&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Generic/v-binop-widen.ll (added)
+++ llvm/trunk/test/CodeGen/Generic/v-binop-widen.ll Tue Jun 15 15:29:05 2010
@@ -0,0 +1,8 @@
+; RUN: llc -march=x86 %s
+
+%vec = type <9 x float>
+define %vec @vecdiv( %vec %p1, %vec %p2)
+{
+ %result = fdiv %vec %p1, %p2
+ ret %vec %result
+}
Added: llvm/trunk/test/CodeGen/Generic/v-binop-widen2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Generic/v-binop-widen2.ll?rev=106038&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Generic/v-binop-widen2.ll (added)
+++ llvm/trunk/test/CodeGen/Generic/v-binop-widen2.ll Tue Jun 15 15:29:05 2010
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | lli
+
+%vec = type <6 x float>
+
+define %vec @vecdiv( %vec %p1, %vec %p2)
+{
+ %result = fdiv %vec %p1, %p2
+ ret %vec %result
+}
+
+ at a = constant %vec < float 2.0, float 4.0, float 8.0, float 16.0, float 32.0, float 64.0 >
+ at b = constant %vec < float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0 >
+
+; Expected result: < 1.0, 2.0, 4.0, ..., 2.0^(n-1) >
+; main() returns 0 if the result is expected and 1 otherwise
+define i32 @main() nounwind {
+entry:
+ %avec = load %vec* @a
+ %bvec = load %vec* @b
+
+ %res = call %vec @vecdiv(%vec %avec, %vec %bvec)
+ br label %loop
+loop:
+ %idx = phi i32 [0, %entry], [%nextInd, %looptail]
+ %expected = phi float [1.0, %entry], [%nextExpected, %looptail]
+ %elem = extractelement %vec %res, i32 %idx
+ %expcmp = fcmp oeq float %elem, %expected
+ br i1 %expcmp, label %looptail, label %return
+looptail:
+ %nextExpected = fmul float %expected, 2.0
+ %nextInd = add i32 %idx, 1
+ %cmp = icmp slt i32 %nextInd, 6
+ br i1 %cmp, label %loop, label %return
+return:
+ %retval = phi i32 [0, %looptail], [1, %loop]
+ ret i32 %retval
+}
More information about the llvm-commits
mailing list