[llvm] r272510 - [X86] Remove sse2 pshufd/pshuflw/pshufhw intrinsics and upgrade them to shufflevector.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 12 07:11:32 PDT 2016


Author: ctopper
Date: Sun Jun 12 09:11:32 2016
New Revision: 272510

URL: http://llvm.org/viewvc/llvm-project?rev=272510&view=rev
Log:
[X86] Remove sse2 pshufd/pshuflw/pshufhw intrinsics and upgrade them to shufflevector.

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/IR/AutoUpgrade.cpp
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=272510&r1=272509&r2=272510&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sun Jun 12 09:11:32 2016
@@ -662,15 +662,6 @@ let TargetPrefix = "x86" in {  // All in
   def int_x86_ssse3_pshuf_b_128     : GCCBuiltin<"__builtin_ia32_pshufb128">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
                          llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_pshuf_d          :
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty],
-                         [IntrNoMem]>;
-  def int_x86_sse2_pshufl_w         :
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
-                         [IntrNoMem]>;
-  def int_x86_sse2_pshufh_w         :
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
-                         [IntrNoMem]>;
   def int_x86_sse_pshuf_w           : GCCBuiltin<"__builtin_ia32_pshufw">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
                          [IntrNoMem]>;

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=272510&r1=272509&r2=272510&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Sun Jun 12 09:11:32 2016
@@ -177,6 +177,7 @@ static bool UpgradeIntrinsicFunction1(Fu
         Name.startswith("x86.avx2.vbroadcast") ||
         Name.startswith("x86.avx2.pbroadcast") ||
         Name.startswith("x86.avx.vpermil.") ||
+        Name.startswith("x86.sse2.pshuf") ||
         Name.startswith("x86.sse41.pmovsx") ||
         Name.startswith("x86.sse41.pmovzx") ||
         Name.startswith("x86.avx2.pmovsx") ||
@@ -880,7 +881,8 @@ void llvm::UpgradeIntrinsicCall(CallInst
       Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
     } else if (Name == "llvm.stackprotectorcheck") {
       Rep = nullptr;
-    } else if (Name.startswith("llvm.x86.avx.vpermil.")) {
+    } else if (Name.startswith("llvm.x86.avx.vpermil.") ||
+               Name == "llvm.x86.sse2.pshuf.d") {
       Value *Op0 = CI->getArgOperand(0);
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
       VectorType *VecTy = cast<VectorType>(CI->getType());
@@ -897,6 +899,34 @@ void llvm::UpgradeIntrinsicCall(CallInst
         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
 
       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
+    } else if (Name == "llvm.x86.sse2.pshufl.w") {
+      Value *Op0 = CI->getArgOperand(0);
+      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      unsigned NumElts = CI->getType()->getVectorNumElements();
+
+      SmallVector<uint32_t, 16> Idxs(NumElts);
+      for (unsigned l = 0; l != NumElts; l += 8) {
+        for (unsigned i = 0; i != 4; ++i)
+          Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
+        for (unsigned i = 4; i != 8; ++i)
+          Idxs[i + l] = i + l;
+      }
+
+      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
+    } else if (Name == "llvm.x86.sse2.pshufh.w") {
+      Value *Op0 = CI->getArgOperand(0);
+      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      unsigned NumElts = CI->getType()->getVectorNumElements();
+
+      SmallVector<uint32_t, 16> Idxs(NumElts);
+      for (unsigned l = 0; l != NumElts; l += 8) {
+        for (unsigned i = 0; i != 4; ++i)
+          Idxs[i + l] = i + l;
+        for (unsigned i = 0; i != 4; ++i)
+          Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
+      }
+
+      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
     } else {
       llvm_unreachable("Unknown function for CallInst upgrade.");
     }

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=272510&r1=272509&r2=272510&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sun Jun 12 09:11:32 2016
@@ -2133,9 +2133,6 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(sse2_pmulhu_w,     INTR_TYPE_2OP, ISD::MULHU, 0),
   X86_INTRINSIC_DATA(sse2_pmulu_dq,     INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
   X86_INTRINSIC_DATA(sse2_psad_bw,      INTR_TYPE_2OP, X86ISD::PSADBW, 0),
-  X86_INTRINSIC_DATA(sse2_pshuf_d,      INTR_TYPE_2OP, X86ISD::PSHUFD, 0),
-  X86_INTRINSIC_DATA(sse2_pshufh_w,     INTR_TYPE_2OP, X86ISD::PSHUFHW, 0),
-  X86_INTRINSIC_DATA(sse2_pshufl_w,     INTR_TYPE_2OP, X86ISD::PSHUFLW, 0),
   X86_INTRINSIC_DATA(sse2_psll_d,       INTR_TYPE_2OP, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(sse2_psll_q,       INTR_TYPE_2OP, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(sse2_psll_w,       INTR_TYPE_2OP, X86ISD::VSHL, 0),

Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll?rev=272510&r1=272509&r2=272510&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll Sun Jun 12 09:11:32 2016
@@ -138,4 +138,35 @@ define void @test_x86_sse2_storeu_pd(i8*
 }
 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
 
+define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
+; CHECK-LABEL: test_x86_sse2_pshuf_d:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; CHECK-NEXT:    retl
+entry:
+   %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
+   ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
+
+define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
+; CHECK-LABEL: test_x86_sse2_pshufl_w:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT:    retl
+entry:
+   %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
+   ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
 
+define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
+; CHECK-LABEL: test_x86_sse2_pshufh_w:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT:    retl
+entry:
+   %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
+   ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone

Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=272510&r1=272509&r2=272510&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Sun Jun 12 09:11:32 2016
@@ -1266,51 +1266,3 @@ define void @test_x86_sse2_pause() {
   ret void
 }
 declare void @llvm.x86.sse2.pause() nounwind
-
-define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
-; SSE-LABEL: test_x86_sse2_pshuf_d:
-; SSE:       ## BB#0: ## %entry
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; SSE-NEXT:    retl
-;
-; KNL-LABEL: test_x86_sse2_pshuf_d:
-; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; KNL-NEXT:    retl
-entry:
-   %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
-   ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
-
-define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
-; SSE-LABEL: test_x86_sse2_pshufl_w:
-; SSE:       ## BB#0: ## %entry
-; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE-NEXT:    retl
-;
-; KNL-LABEL: test_x86_sse2_pshufl_w:
-; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; KNL-NEXT:    retl
-entry:
-   %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
-   ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
-
-define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
-; SSE-LABEL: test_x86_sse2_pshufh_w:
-; SSE:       ## BB#0: ## %entry
-; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; SSE-NEXT:    retl
-;
-; KNL-LABEL: test_x86_sse2_pshufh_w:
-; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; KNL-NEXT:    retl
-entry:
-   %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
-   ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll?rev=272510&r1=272509&r2=272510&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll Sun Jun 12 09:11:32 2016
@@ -96,10 +96,15 @@ define <4 x i32> @combine_pshufd6(<4 x i
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: combine_pshufd6:
-; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX-NEXT:    retq
+; AVX1-LABEL: combine_pshufd6:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_pshufd6:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vbroadcastss %xmm0, %xmm0
+; AVX2-NEXT:    retq
 entry:
   %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
   %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)




More information about the llvm-commits mailing list