[PATCH] Optimize sext 4xi8,4xi16 to 4xi64
Muhammad Tauqir Ahmad
muhammad.t.ahmad at intel.com
Tue Mar 5 13:22:35 PST 2013
Update the cost model. Update the cost tests for 4xi8,4xi16 -> 4xi64.
Also, to answer Nadav's question on how I verified the code:
Assuming you were asking about how I verified the assembly sequence produced, I asked Elena whether this new sequence is good and she said yes. Also verified using the instruction manual. Didn't actually execute any code on hardware.
Hi nadav,
http://llvm-reviews.chandlerc.com/D491
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D491?vs=1186&id=1191#toc
Files:
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86TargetTransformInfo.cpp
test/Analysis/CostModel/X86/cast.ll
test/CodeGen/X86/avx-sext.ll
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -11815,8 +11815,23 @@
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
- Op.getOperand(0), ShAmt, DAG);
+ // (sext (vzext x)) -> (vsext x)
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op00 = Op0.getOperand(0);
+ SDValue Tmp1;
+ // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Tmp1 = LowerVectorIntExtend(Op00, DAG);
+ if (Tmp1.getNode()) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+
+ // If the above didn't work, then just use Shift-Left + Shift-Right.
+ Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
}
}
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -257,8 +257,8 @@
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
};
Index: test/Analysis/CostModel/X86/cast.ll
===================================================================
--- test/Analysis/CostModel/X86/cast.ll
+++ test/Analysis/CostModel/X86/cast.ll
@@ -44,9 +44,9 @@
%B = zext <8 x i16> undef to <8 x i32>
;CHECK: cost of 1 {{.*}} sext
%C = sext <4 x i32> undef to <4 x i64>
- ;CHECK: cost of 8 {{.*}} sext
+ ;CHECK: cost of 6 {{.*}} sext
%C1 = sext <4 x i8> undef to <4 x i64>
- ;CHECK: cost of 8 {{.*}} sext
+ ;CHECK: cost of 6 {{.*}} sext
%C2 = sext <4 x i16> undef to <4 x i64>
;CHECK: cost of 1 {{.*}} zext
Index: test/CodeGen/X86/avx-sext.ll
===================================================================
--- test/CodeGen/X86/avx-sext.ll
+++ test/CodeGen/X86/avx-sext.ll
@@ -165,3 +165,24 @@
ret <4 x i64> %extmask
}
+; AVX: sext_4i8_to_4i64
+; AVX: vpmovsxbd
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
+
+; AVX: sext_4i16_to_4i64
+; AVX: vpmovsxwd
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D491.3.patch
Type: text/x-patch
Size: 3235 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130305/40ba452c/attachment.bin>
More information about the llvm-commits
mailing list