[llvm] r248368 - [X86][SSE] Replace 128-bit SSE41 PMOVSX intrinsics with native IR

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 23 01:48:34 PDT 2015


Author: rksimon
Date: Wed Sep 23 03:48:33 2015
New Revision: 248368

URL: http://llvm.org/viewvc/llvm-project?rev=248368&view=rev
Log:
[X86][SSE] Replace 128-bit SSE41 PMOVSX intrinsics with native IR

This patches removes the x86.sse41.pmovsx* intrinsics, provides a suitable upgrade path and updates relevant tests to sign extend a subvector instead.

LLVM counterpart to D12835

Differential Revision: http://reviews.llvm.org/D13002

Modified:
    llvm/trunk/lib/IR/AutoUpgrade.cpp
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
    llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
    llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/sse41.ll

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Wed Sep 23 03:48:33 2015
@@ -132,6 +132,7 @@ static bool UpgradeIntrinsicFunction1(Fu
         Name.startswith("x86.avx2.vbroadcast") ||
         Name.startswith("x86.avx2.pbroadcast") ||
         Name.startswith("x86.avx.vpermil.") ||
+        Name.startswith("x86.sse41.pmovsx") ||
         Name == "x86.avx.vinsertf128.pd.256" ||
         Name == "x86.avx.vinsertf128.ps.256" ||
         Name == "x86.avx.vinsertf128.si.256" ||
@@ -440,6 +441,19 @@ void llvm::UpgradeIntrinsicCall(CallInst
       for (unsigned I = 0; I < EltNum; ++I)
         Rep = Builder.CreateInsertElement(Rep, Load,
                                           ConstantInt::get(I32Ty, I));
+    } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
+      VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
+      VectorType *DstTy = cast<VectorType>(CI->getType());
+      unsigned NumDstElts = DstTy->getNumElements();
+
+      // Extract a subvector of the first NumDstElts lanes and sign extend.
+      SmallVector<int, 8> ShuffleMask;
+      for (int i = 0; i != (int)NumDstElts; ++i)
+        ShuffleMask.push_back(i);
+
+      Value *SV = Builder.CreateShuffleVector(
+          CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
+      Rep = Builder.CreateSExt(SV, DstTy);
     } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
       // Replace vbroadcasts with a vector shuffle.
       Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
@@ -527,10 +541,10 @@ void llvm::UpgradeIntrinsicCall(CallInst
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
       VectorType *VecTy = cast<VectorType>(CI->getType());
       unsigned NumElts = VecTy->getNumElements();
-      
+
       // Mask off the high bits of the immediate value; hardware ignores those.
       Imm = Imm & 1;
-      
+
       // Extend the second operand into a vector that is twice as big.
       Value *UndefV = UndefValue::get(Op1->getType());
       SmallVector<Constant*, 8> Idxs;
@@ -572,7 +586,7 @@ void llvm::UpgradeIntrinsicCall(CallInst
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
       VectorType *VecTy = cast<VectorType>(CI->getType());
       unsigned NumElts = VecTy->getNumElements();
-      
+
       // Mask off the high bits of the immediate value; hardware ignores those.
       Imm = Imm & 1;
 

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Sep 23 03:48:33 2015
@@ -681,13 +681,13 @@ static const IntrinsicData  IntrinsicsWi
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0), 
-  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0), 
-  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0), 
-  X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0), 
-  X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0), 
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0), 
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0), 
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0), 
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
   X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0), 
-  X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
+  X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
@@ -1628,12 +1628,6 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(sse41_pminsd,      INTR_TYPE_2OP, ISD::SMIN, 0),
   X86_INTRINSIC_DATA(sse41_pminud,      INTR_TYPE_2OP, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(sse41_pminuw,      INTR_TYPE_2OP, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxbd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxbq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxbw,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxdq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxwd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxwq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovzxbd,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovzxbq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovzxbw,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Wed Sep 23 03:48:33 2015
@@ -936,12 +936,6 @@ Instruction *InstCombiner::visitCallInst
     break;
   }
 
-  case Intrinsic::x86_sse41_pmovsxbd:
-  case Intrinsic::x86_sse41_pmovsxbq:
-  case Intrinsic::x86_sse41_pmovsxbw:
-  case Intrinsic::x86_sse41_pmovsxdq:
-  case Intrinsic::x86_sse41_pmovsxwd:
-  case Intrinsic::x86_sse41_pmovsxwq:
   case Intrinsic::x86_avx2_pmovsxbd:
   case Intrinsic::x86_avx2_pmovsxbq:
   case Intrinsic::x86_avx2_pmovsxbw:

Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll Wed Sep 23 03:48:33 2015
@@ -143,3 +143,69 @@ define <8 x i16> @test_x86_sse41_pblendw
   ret <8 x i16> %res
 }
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbq:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbw:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxdq:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxwd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxwq:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone

Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Wed Sep 23 03:48:33 2015
@@ -1251,72 +1251,6 @@ define <8 x i16> @test_x86_sse41_pminuw(
 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbd:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbq:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbw:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxdq:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxwd:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxwq:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
 ; CHECK-LABEL: test_x86_sse41_pmovzxbd:
 ; CHECK:       ## BB#0:
@@ -3378,7 +3312,7 @@ define void @movnt_dq(i8* %p, <2 x i64>
 ; CHECK-LABEL: movnt_dq:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    vpaddq LCPI282_0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq LCPI276_0, %xmm0, %xmm0
 ; CHECK-NEXT:    vmovntdq %ymm0, (%eax)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll Wed Sep 23 03:48:33 2015
@@ -42,7 +42,6 @@ define <4 x float> @test_x86_sse41_inser
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
 
 
-
 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: mpsadbw
   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
@@ -59,3 +58,49 @@ define <8 x i16> @test_x86_sse41_pblendw
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
 
 
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: pmovsxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: pmovsxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: pmovsxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: pmovsxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: pmovsxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: pmovsxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone

Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll Wed Sep 23 03:48:33 2015
@@ -162,54 +162,6 @@ define <8 x i16> @test_x86_sse41_pminuw(
 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
-  ; CHECK: pmovsxbd
-  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
-  ; CHECK: pmovsxbq
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
-  ; CHECK: pmovsxbw
-  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
-  ; CHECK: pmovsxdq
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
-  ; CHECK: pmovsxwd
-  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
-  ; CHECK: pmovsxwq
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
   ; CHECK: pmovzxbd
   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]

Modified: llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll Wed Sep 23 03:48:33 2015
@@ -6,8 +6,9 @@ define <8 x i16> @test_llvm_x86_sse41_pm
 ; SSE41: pmovsxbw (%rdi), %xmm0
 ; AVX:  vpmovsxbw (%rdi), %xmm0
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
-  %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %1)
-  ret <8 x i16> %2
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = sext <8 x i8> %2 to <8 x i16>
+  ret <8 x i16> %3
 }
 
 define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) {
@@ -15,8 +16,9 @@ define <4 x i32> @test_llvm_x86_sse41_pm
 ; SSE41: pmovsxbd (%rdi), %xmm0
 ; AVX:  vpmovsxbd (%rdi), %xmm0
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
-  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %1)
-  ret <4 x i32> %2
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = sext <4 x i8> %2 to <4 x i32>
+  ret <4 x i32> %3
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) {
@@ -24,8 +26,9 @@ define <2 x i64> @test_llvm_x86_sse41_pm
 ; SSE41: pmovsxbq (%rdi), %xmm0
 ; AVX:  vpmovsxbq (%rdi), %xmm0
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %1)
-  ret <2 x i64> %2
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i8> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
 
 define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) {
@@ -33,8 +36,9 @@ define <4 x i32> @test_llvm_x86_sse41_pm
 ; SSE41: pmovsxwd (%rdi), %xmm0
 ; AVX:  vpmovsxwd (%rdi), %xmm0
   %1 = load <8 x i16>, <8 x i16>* %a, align 1
-  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1)
-  ret <4 x i32> %2
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = sext <4 x i16> %2 to <4 x i32>
+  ret <4 x i32> %3
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) {
@@ -42,8 +46,9 @@ define <2 x i64> @test_llvm_x86_sse41_pm
 ; SSE41: pmovsxwq (%rdi), %xmm0
 ; AVX:  vpmovsxwq (%rdi), %xmm0
   %1 = load <8 x i16>, <8 x i16>* %a, align 1
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %1)
-  ret <2 x i64> %2
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) {
@@ -51,8 +56,9 @@ define <2 x i64> @test_llvm_x86_sse41_pm
 ; SSE41: pmovsxdq (%rdi), %xmm0
 ; AVX:  vpmovsxdq (%rdi), %xmm0
   %1 = load <4 x i32>, <4 x i32>* %a, align 1
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %1)
-  ret <2 x i64> %2
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
 
 define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {
@@ -115,9 +121,3 @@ declare <4 x i32> @llvm.x86.sse41.pmovzx
 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>)
 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>)
 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>)
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>)
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>)
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>)

Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=248368&r1=248367&r2=248368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Wed Sep 23 03:48:33 2015
@@ -31,49 +31,6 @@ define <16 x i8> @pinsrb_1(i8 %s, <16 x
   ret <16 x i8> %tmp1
 }
 
-define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
-; X32-LABEL: pmovsxbd_1:
-; X32:       ## BB#0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    pmovsxbd (%eax), %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: pmovsxbd_1:
-; X64:       ## BB#0: ## %entry
-; X64-NEXT:    pmovsxbd (%rdi), %xmm0
-; X64-NEXT:    retq
-entry:
-	%0 = load i32, i32* %p, align 4
-	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
-	%2 = insertelement <4 x i32> %1, i32 0, i32 1
-	%3 = insertelement <4 x i32> %2, i32 0, i32 2
-	%4 = insertelement <4 x i32> %3, i32 0, i32 3
-	%5 = bitcast <4 x i32> %4 to <16 x i8>
-	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
-	%7 = bitcast <4 x i32> %6 to <2 x i64>
-	ret <2 x i64> %7
-}
-
-define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
-; X32-LABEL: pmovsxwd_1:
-; X32:       ## BB#0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    pmovsxwd (%eax), %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: pmovsxwd_1:
-; X64:       ## BB#0: ## %entry
-; X64-NEXT:    pmovsxwd (%rdi), %xmm0
-; X64-NEXT:    retq
-entry:
-	%0 = load i64, i64* %p		; <i64> [#uses=1]
-	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
-	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
-	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	ret <2 x i64> %3
-}
-
 define <2 x i64> @pmovzxbq_1() nounwind {
 ; X32-LABEL: pmovzxbq_1:
 ; X32:       ## BB#0: ## %entry
@@ -94,8 +51,6 @@ entry:
 	ret <2 x i64> %3
 }
 
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
 
 define i32 @extractps_1(<4 x float> %v) nounwind {
@@ -137,7 +92,7 @@ define float @ext_1(<4 x float> %v) noun
 ; X32:       ## BB#0:
 ; X32-NEXT:    pushl %eax
 ; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X32-NEXT:    addss LCPI7_0, %xmm0
+; X32-NEXT:    addss LCPI5_0, %xmm0
 ; X32-NEXT:    movss %xmm0, (%esp)
 ; X32-NEXT:    flds (%esp)
 ; X32-NEXT:    popl %eax




More information about the llvm-commits mailing list