[PATCH] [InstCombine][x86] Constant fold pslld.
Michael Spencer
bigcheesegs at gmail.com
Fri Apr 11 11:24:34 PDT 2014
Hi grosbach,
The pslld instruction is almost a <4 x i32> shl however, it has defined
behavior of evaluating to 0 for shifts greater than 32. We can’t currently
represent this directly in llvm without generating extra code, but we can
handle the constant case.
http://reviews.llvm.org/D3359
Files:
lib/Transforms/InstCombine/InstCombineCalls.cpp
test/Transforms/InstCombine/vec_demanded_elts.ll
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -554,6 +554,21 @@
break;
}
+ case Intrinsic::x86_sse2_pslli_d: {
+ // Simplify if count is constant. If > 32, 0, otherwise to shl.
+ if (auto Count = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+ if (Count->getZExtValue() > 31)
+ return ReplaceInstUsesWith(
+ CI, ConstantAggregateZero::get(II->getArgOperand(0)->getType()));
+ else {
+ unsigned VWidth =
+ cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+ return BinaryOperator::CreateShl(
+ II->getArgOperand(0), Builder->CreateVectorSplat(VWidth, Count));
+ }
+ }
+ break;
+ }
case Intrinsic::x86_sse41_pmovsxbw:
case Intrinsic::x86_sse41_pmovsxwd:
Index: test/Transforms/InstCombine/vec_demanded_elts.ll
===================================================================
--- test/Transforms/InstCombine/vec_demanded_elts.ll
+++ test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -209,4 +209,22 @@
ret <4 x float> %ret
}
+define <4 x i32> @test_pslli_s() nounwind {
+entry:
+; Constant fold.
+; CHECK: test_pslli_s
+; CHECK: <i32 0, i32 2, i32 4, i32 6>
+ %0 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 1)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_pslli_l() nounwind {
+entry:
+; Constant 0
+; CHECK: test_pslli_l
+; CHECK: zeroinitializer
+ %0 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 32)
+ ret <4 x i32> %0
+}
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D3359.1.patch
Type: text/x-patch
Size: 1791 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140411/d58ef46d/attachment.bin>
More information about the llvm-commits
mailing list