[llvm] r271789 - [InstCombine][MMX] Extend SimplifyDemandedUseBits MOVMSK support to MMX
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 4 06:42:48 PDT 2016
Author: rksimon
Date: Sat Jun 4 08:42:46 2016
New Revision: 271789
URL: http://llvm.org/viewvc/llvm-project?rev=271789&view=rev
Log:
[InstCombine][MMX] Extend SimplifyDemandedUseBits MOVMSK support to MMX
Add the MMX implementation to the SimplifyDemandedUseBits SSE/AVX MOVMSK support added in D19614
Requires a minor tweak as llvm.x86.mmx.pmovmskb takes a x86_mmx argument - so we have to be explicit about the implied v8i8 vector type.
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=271789&r1=271788&r2=271789&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Sat Jun 4 08:42:46 2016
@@ -768,6 +768,7 @@ Value *InstCombiner::SimplifyDemandedUse
// TODO: Could compute known zero/one bits based on the input.
break;
}
+ case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_sse2_movmsk_pd:
case Intrinsic::x86_sse2_pmovmskb_128:
@@ -776,9 +777,14 @@ Value *InstCombiner::SimplifyDemandedUse
case Intrinsic::x86_avx2_pmovmskb: {
// MOVMSK copies the vector elements' sign bits to the low bits
// and zeros the high bits.
- auto Arg = II->getArgOperand(0);
- auto ArgType = cast<VectorType>(Arg->getType());
- unsigned ArgWidth = ArgType->getNumElements();
+ unsigned ArgWidth;
+ if (II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
+ ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
+ } else {
+ auto Arg = II->getArgOperand(0);
+ auto ArgType = cast<VectorType>(Arg->getType());
+ ArgWidth = ArgType->getNumElements();
+ }
// If we don't need any of low bits then return zero,
// we know that DemandedMask is non-zero already.
Modified: llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll?rev=271789&r1=271788&r2=271789&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll Sat Jun 4 08:42:46 2016
@@ -7,6 +7,16 @@ target datalayout = "e-m:e-i64:64-f80:12
; DemandedBits - MOVMSK zeros the upper bits of the result.
;
+define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) {
+; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
+ %2 = and i32 %1, 255
+ ret i32 %2
+}
+
define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
@@ -63,6 +73,15 @@ define i32 @test_upper_x86_avx_movmsk_pd
; DemandedBits - If we don't use the lower bits then we just return zero.
;
+define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) {
+; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
+ %2 = and i32 %1, -256
+ ret i32 %2
+}
+
define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
; CHECK-NEXT: ret i32 0
@@ -110,6 +129,7 @@ define i32 @test_lower_x86_avx_movmsk_pd
; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx)
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
More information about the llvm-commits
mailing list