[llvm] r232045 - [X86, AVX] replace vextractf128 intrinsics with generic shuffles
Sanjay Patel
spatel at rotateright.com
Thu Mar 12 08:15:19 PDT 2015
Author: spatel
Date: Thu Mar 12 10:15:19 2015
New Revision: 232045
URL: http://llvm.org/viewvc/llvm-project?rev=232045&view=rev
Log:
[X86, AVX] replace vextractf128 intrinsics with generic shuffles
Now that we've replaced the vinsertf128 intrinsics,
do the same for their extract twins.
This is very much like D8086 (checked in at r231794):
We want to replace as much custom x86 shuffling via intrinsics
as possible because pushing the code down the generic shuffle
optimization path allows for better codegen and less complexity
in LLVM.
This is also the LLVM sibling to the cfe D8275 patch.
Differential Revision: http://reviews.llvm.org/D8276
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=232045&r1=232044&r2=232045&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Mar 12 10:15:19 2015
@@ -1172,19 +1172,6 @@ let TargetPrefix = "x86" in { // All in
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
}
-// Vector extract and insert
-let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_vextractf128_pd_256 :
- GCCBuiltin<"__builtin_ia32_vextractf128_pd256">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx_vextractf128_ps_256 :
- GCCBuiltin<"__builtin_ia32_vextractf128_ps256">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx_vextractf128_si_256 :
- GCCBuiltin<"__builtin_ia32_vextractf128_si256">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-}
-
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=232045&r1=232044&r2=232045&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Thu Mar 12 10:15:19 2015
@@ -4978,9 +4978,6 @@ SelectionDAGBuilder::visitIntrinsicCall(
setValue(&I, Res);
return nullptr;
}
- case Intrinsic::x86_avx_vextractf128_pd_256:
- case Intrinsic::x86_avx_vextractf128_ps_256:
- case Intrinsic::x86_avx_vextractf128_si_256:
case Intrinsic::x86_avx2_vextracti128: {
EVT DestVT = TLI.getValueType(I.getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=232045&r1=232044&r2=232045&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Mar 12 10:15:19 2015
@@ -161,6 +161,9 @@ static bool UpgradeIntrinsicFunction1(Fu
Name == "x86.avx.vinsertf128.pd.256" ||
Name == "x86.avx.vinsertf128.ps.256" ||
Name == "x86.avx.vinsertf128.si.256" ||
+ Name == "x86.avx.vextractf128.pd.256" ||
+ Name == "x86.avx.vextractf128.ps.256" ||
+ Name == "x86.avx.vextractf128.si.256" ||
Name == "x86.avx.movnt.dq.256" ||
Name == "x86.avx.movnt.pd.256" ||
Name == "x86.avx.movnt.ps.256" ||
@@ -676,6 +679,26 @@ void llvm::UpgradeIntrinsicCall(CallInst
Idxs2.push_back(Builder.getInt32(Idx));
}
Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
+ } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
+ Name == "llvm.x86.avx.vextractf128.ps.256" ||
+ Name == "llvm.x86.avx.vextractf128.si.256") {
+ Value *Op0 = CI->getArgOperand(0);
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ unsigned NumElts = VecTy->getNumElements();
+
+ // Mask off the high bits of the immediate value; hardware ignores those.
+ Imm = Imm & 1;
+
+ // Get indexes for either the high half or low half of the input vector.
+ SmallVector<Constant*, 4> Idxs(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned Idx = Imm ? (i + NumElts) : i;
+ Idxs[i] = Builder.getInt32(Idx);
+ }
+
+ Value *UndefV = UndefValue::get(Op0->getType());
+ Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
} else {
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
if (Name == "llvm.x86.avx.vpermil.pd.256")
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll?rev=232045&r1=232044&r2=232045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll Thu Mar 12 10:15:19 2015
@@ -36,6 +36,43 @@ define <8 x i32> @test_x86_avx_vinsertf1
}
declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+; We don't check any vextractf128 variant with immediate 0 because that's just a move.
+
+define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
+; CHECK-LABEL: test_x86_avx_vextractf128_pd_256_1:
+; CHECK: vextractf128 $1, %ymm0, %xmm0
+ %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
+; CHECK-LABEL: test_x86_avx_vextractf128_ps_256_1:
+; CHECK: vextractf128 $1, %ymm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
+; CHECK-LABEL: test_x86_avx_vextractf128_si_256_1:
+; CHECK: vextractf128 $1, %ymm0, %xmm0
+ %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+; Verify that high bits of the immediate are masked off. This should be the equivalent
+; of a vextractf128 $0 which should be optimized away, so just check that it's
+; not a vextractf128 of any kind.
+define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
+; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
+; CHECK-NOT: vextractf128
+ %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
+ ret <2 x double> %res
+}
+
+
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_blend_pd_256:
; CHECK: vblendpd
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=232045&r1=232044&r2=232045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Thu Mar 12 10:15:19 2015
@@ -2163,30 +2163,6 @@ define <8 x float> @test_x86_avx_vbroadc
declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
-define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
- ; CHECK: vextractf128
- %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
-
-
-define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
- ; CHECK: vextractf128
- %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
-
-
-define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
- ; CHECK: vextractf128
- %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
-
-
define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK: vperm2f128
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
More information about the llvm-commits
mailing list