[llvm-commits] [llvm] r172269 - in /llvm/trunk: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp test/CodeGen/PowerPC/vec_extload.ll
Nadav Rotem
nrotem at apple.com
Fri Jan 11 14:57:48 PST 2013
Author: nadav
Date: Fri Jan 11 16:57:48 2013
New Revision: 172269
URL: http://llvm.org/viewvc/llvm-project?rev=172269&view=rev
Log:
PPC: Implement efficient lowering of sign_extend_inreg.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/trunk/test/CodeGen/PowerPC/vec_extload.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=172269&r1=172268&r2=172269&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Fri Jan 11 16:57:48 2013
@@ -61,6 +61,8 @@
// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ SDValue ExpandSEXTINREG(SDValue Op);
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
@@ -262,7 +264,9 @@
// FALL THROUGH
}
case TargetLowering::Expand:
- if (Node->getOpcode() == ISD::VSELECT)
+ if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
+ Result = ExpandSEXTINREG(Op);
+ else if (Node->getOpcode() == ISD::VSELECT)
Result = ExpandVSELECT(Op);
else if (Node->getOpcode() == ISD::SELECT)
Result = ExpandSELECT(Op);
@@ -501,6 +505,26 @@
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
}
+SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Make sure that the SRA and SRL instructions are available.
+ if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+
+ unsigned BW = VT.getScalarType().getSizeInBits();
+ unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+ SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
+
+ Op = Op.getOperand(0);
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op, ShiftSz);
+ return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
Modified: llvm/trunk/test/CodeGen/PowerPC/vec_extload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_extload.ll?rev=172269&r1=172268&r2=172269&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_extload.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_extload.ll Fri Jan 11 16:57:48 2013
@@ -15,55 +15,9 @@
ret <16 x i8> %c
}
; CHECK: v16si8_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vsrb
+; CHECK: vsrab
+; CHECK: blr
; The zero extend uses a more clever logic: a vector splat
; and a logic and to set higher bits to 0.
@@ -83,31 +37,9 @@
ret <8 x i16> %c
}
; CHECK: v8si16_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vsrh
+; CHECK: vsrah
+; CHECK: blr
; Same as v8si16_sext_in_reg, but instead of creating the mask
; with a splat, loads it from memory.
@@ -129,19 +61,9 @@
ret <4 x i32> %c
}
; CHECK: v4si32_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vsrw
+; CHECK: vsraw
+; CHECK: blr
; Same as v8si16_sext_in_reg.
define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
More information about the llvm-commits
mailing list