[llvm] r232120 - [X86, AVX2] Replace inserti128 and extracti128 intrinsics with generic shuffles

Sanjay Patel spatel at rotateright.com
Thu Mar 12 16:16:18 PDT 2015


Author: spatel
Date: Thu Mar 12 18:16:18 2015
New Revision: 232120

URL: http://llvm.org/viewvc/llvm-project?rev=232120&view=rev
Log:
[X86, AVX2] Replace inserti128 and extracti128 intrinsics with generic shuffles

This should complete the job started in r231794 and continued in r232045:
We want to replace as much custom x86 shuffling via intrinsics
as possible because pushing the code down the generic shuffle
optimization path allows for better codegen and less complexity
in LLVM.

AVX2 introduced proper integer variants of the hacked integer insert/extract
C intrinsics that were created for this same functionality with AVX1.

This should complete the removal of insert/extract128 intrinsics.

The Clang precursor patch for this change was checked in at r232109.


Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/trunk/lib/IR/AutoUpgrade.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
    llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=232120&r1=232119&r2=232120&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Mar 12 18:16:18 2015
@@ -1759,13 +1759,6 @@ let TargetPrefix = "x86" in {  // All in
 
 // Vector extract and insert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
   def int_x86_avx512_mask_vextractf32x4_512 :
       GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
                  Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=232120&r1=232119&r2=232120&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Thu Mar 12 18:16:18 2015
@@ -4966,28 +4966,6 @@ SelectionDAGBuilder::visitIntrinsicCall(
     setValue(&I, Res);
     return nullptr;
   }
-  case Intrinsic::x86_avx2_vinserti128: {
-    EVT DestVT = TLI.getValueType(I.getType());
-    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
-    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
-                   ElVT.getVectorNumElements();
-    Res =
-        DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
-                    getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
-                    DAG.getConstant(Idx, TLI.getVectorIdxTy()));
-    setValue(&I, Res);
-    return nullptr;
-  }
-  case Intrinsic::x86_avx2_vextracti128: {
-    EVT DestVT = TLI.getValueType(I.getType());
-    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
-                   DestVT.getVectorNumElements();
-    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
-                      getValue(I.getArgOperand(0)),
-                      DAG.getConstant(Idx, TLI.getVectorIdxTy()));
-    setValue(&I, Res);
-    return nullptr;
-  }
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=232120&r1=232119&r2=232120&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Mar 12 18:16:18 2015
@@ -161,9 +161,11 @@ static bool UpgradeIntrinsicFunction1(Fu
         Name == "x86.avx.vinsertf128.pd.256" ||
         Name == "x86.avx.vinsertf128.ps.256" ||
         Name == "x86.avx.vinsertf128.si.256" ||
+        Name == "x86.avx2.vinserti128" ||
         Name == "x86.avx.vextractf128.pd.256" ||
         Name == "x86.avx.vextractf128.ps.256" ||
         Name == "x86.avx.vextractf128.si.256" ||
+        Name == "x86.avx2.vextracti128" ||
         Name == "x86.avx.movnt.dq.256" ||
         Name == "x86.avx.movnt.pd.256" ||
         Name == "x86.avx.movnt.ps.256" ||
@@ -634,7 +636,8 @@ void llvm::UpgradeIntrinsicCall(CallInst
       Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
     } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
                Name == "llvm.x86.avx.vinsertf128.ps.256" ||
-               Name == "llvm.x86.avx.vinsertf128.si.256") {
+               Name == "llvm.x86.avx.vinsertf128.si.256" ||
+               Name == "llvm.x86.avx2.vinserti128") {
       Value *Op0 = CI->getArgOperand(0);
       Value *Op1 = CI->getArgOperand(1);
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
@@ -679,7 +682,8 @@ void llvm::UpgradeIntrinsicCall(CallInst
       Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
     } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
                Name == "llvm.x86.avx.vextractf128.ps.256" ||
-               Name == "llvm.x86.avx.vextractf128.si.256") {
+               Name == "llvm.x86.avx.vextractf128.si.256" ||
+               Name == "llvm.x86.avx2.vextracti128") {
       Value *Op0 = CI->getArgOperand(0);
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
       VectorType *VecTy = cast<VectorType>(CI->getType());

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=232120&r1=232119&r2=232120&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Mar 12 18:16:18 2015
@@ -8595,9 +8595,7 @@ def : Pat<(vinsert128_insert:$ins (v16i1
 //
 def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
           (ins VR256:$src1, u8imm:$src2),
-          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-          [(set VR128:$dst,
-            (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
           Sched<[WriteShuffle256]>, VEX, VEX_L;
 let hasSideEffects = 0, mayStore = 1 in
 def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),

Modified: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll?rev=232120&r1=232119&r2=232120&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll Thu Mar 12 18:16:18 2015
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
 
 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
   ; CHECK: vpblendw
@@ -62,3 +62,24 @@ define <4 x i64> @test_x86_avx2_psrl_dq(
   ret <4 x i64> %res
 }
 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
+; CHECK-LABEL:    test_x86_avx2_vextracti128:
+; CHECK:          vextracti128
+
+  %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL:    test_x86_avx2_vinserti128:
+; CHECK:          vinserti128
+
+  %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
+

Modified: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll?rev=232120&r1=232119&r2=232120&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll Thu Mar 12 18:16:18 2015
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
 
 define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
   ; CHECK: vpackssdw
@@ -775,22 +775,6 @@ define <4 x i64> @test_x86_avx2_vperm2i1
 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
 
 
-define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
-  ; CHECK: vextracti128
-  %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vinserti128
-  %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
-
-
 define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
   ; CHECK: vpmaskmovq
   %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]





More information about the llvm-commits mailing list