[llvm] r272907 - [x86] autoupgrade and remove SSE2/SSE41 integer min/max intrinsics
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 16 08:48:30 PDT 2016
Author: spatel
Date: Thu Jun 16 10:48:30 2016
New Revision: 272907
URL: http://llvm.org/viewvc/llvm-project?rev=272907&view=rev
Log:
[x86] autoupgrade and remove SSE2/SSE41 integer min/max intrinsics
Follow-up to:
http://reviews.llvm.org/rL272806
http://reviews.llvm.org/rL272807
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=272907&r1=272906&r2=272907&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Jun 16 10:48:30 2016
@@ -406,18 +406,6 @@ let TargetPrefix = "x86" in { // All in
def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pmaxu_b :
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pmaxs_w :
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pminu_b :
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pmins_w :
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
@@ -735,34 +723,6 @@ let TargetPrefix = "x86" in { // All in
[IntrNoMem]>;
}
-// Vector compare, min, max
-let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_pmaxsb :
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
- [IntrNoMem, Commutative]>;
- def int_x86_sse41_pmaxsd :
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem, Commutative]>;
- def int_x86_sse41_pmaxud :
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem, Commutative]>;
- def int_x86_sse41_pmaxuw :
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem, Commutative]>;
- def int_x86_sse41_pminsb :
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
- [IntrNoMem, Commutative]>;
- def int_x86_sse41_pminsd :
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem, Commutative]>;
- def int_x86_sse41_pminud :
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem, Commutative]>;
- def int_x86_sse41_pminuw :
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem, Commutative]>;
-}
-
// Advanced Encryption Standard (AES) Instructions
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_aesni_aesimc : GCCBuiltin<"__builtin_ia32_aesimc128">,
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=272907&r1=272906&r2=272907&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Jun 16 10:48:30 2016
@@ -174,6 +174,18 @@ static bool UpgradeIntrinsicFunction1(Fu
Name.startswith("x86.sse2.pcmpgt.") ||
Name.startswith("x86.avx2.pcmpeq.") ||
Name.startswith("x86.avx2.pcmpgt.") ||
+ Name == "x86.sse41.pmaxsb" ||
+ Name == "x86.sse2.pmaxs.w" ||
+ Name == "x86.sse41.pmaxsd" ||
+ Name == "x86.sse2.pmaxu.b" ||
+ Name == "x86.sse41.pmaxuw" ||
+ Name == "x86.sse41.pmaxud" ||
+ Name == "x86.sse41.pminsb" ||
+ Name == "x86.sse2.pmins.w" ||
+ Name == "x86.sse41.pminsd" ||
+ Name == "x86.sse2.pminu.b" ||
+ Name == "x86.sse41.pminuw" ||
+ Name == "x86.sse41.pminud" ||
Name.startswith("x86.avx2.vbroadcast") ||
Name.startswith("x86.avx2.pbroadcast") ||
Name.startswith("x86.avx.vpermil.") ||
@@ -518,6 +530,14 @@ static Value *UpgradeMaskedLoad(IRBuilde
return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
}
+static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
+ ICmpInst::Predicate Pred) {
+ Value *Op0 = CI.getArgOperand(0);
+ Value *Op1 = CI.getArgOperand(1);
+ Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
+ return Builder.CreateSelect(Cmp, Op0, Op1);
+}
+
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@@ -544,6 +564,22 @@ void llvm::UpgradeIntrinsicCall(CallInst
Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
"pcmpgt");
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if (Name == "llvm.x86.sse41.pmaxsb" ||
+ Name == "llvm.x86.sse2.pmaxs.w" ||
+ Name == "llvm.x86.sse41.pmaxsd") {
+ Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
+ } else if (Name == "llvm.x86.sse2.pmaxu.b" ||
+ Name == "llvm.x86.sse41.pmaxuw" ||
+ Name == "llvm.x86.sse41.pmaxud") {
+ Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
+ } else if (Name == "llvm.x86.sse41.pminsb" ||
+ Name == "llvm.x86.sse2.pmins.w" ||
+ Name == "llvm.x86.sse41.pminsd") {
+ Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
+ } else if (Name == "llvm.x86.sse2.pminu.b" ||
+ Name == "llvm.x86.sse41.pminuw" ||
+ Name == "llvm.x86.sse41.pminud") {
+ Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
} else if (Name == "llvm.x86.sse2.cvtdq2pd" ||
Name == "llvm.x86.sse2.cvtps2pd" ||
Name == "llvm.x86.avx.cvtdq2.pd.256" ||
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=272907&r1=272906&r2=272907&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Thu Jun 16 10:48:30 2016
@@ -2104,10 +2104,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
- X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
@@ -2146,14 +2142,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, ISD::UMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll?rev=272907&r1=272906&r2=272907&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll Thu Jun 16 10:48:30 2016
@@ -144,8 +144,8 @@ define <4 x i32> @test_x86_sse2_pshuf_d(
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: retl
entry:
- %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
- ret <4 x i32> %res
+ %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
+ ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
@@ -155,8 +155,8 @@ define <8 x i16> @test_x86_sse2_pshufl_w
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NEXT: retl
entry:
- %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
- ret <8 x i16> %res
+ %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
+ ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
@@ -166,7 +166,52 @@ define <8 x i16> @test_x86_sse2_pshufh_w
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; CHECK-NEXT: retl
entry:
- %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
- ret <8 x i16> %res
+ %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
+ ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
+
+define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: max_epu8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pmaxub %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: min_epu8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pminub %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: max_epi16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pmaxsw %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: min_epi16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pminsw %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll?rev=272907&r1=272906&r2=272907&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll Thu Jun 16 10:48:30 2016
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse4.1 | FileCheck %s
; This test works just like the non-upgrade one except that it only checks
@@ -211,3 +211,92 @@ define <2 x i64> @test_x86_sse41_pmovzxw
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+define <16 x i8> @max_epi8(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: max_epi8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pmaxsb %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @min_epi8(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: min_epi8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pminsb %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @max_epu16(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: max_epu16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pmaxuw %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @min_epu16(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: min_epu16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pminuw %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @max_epi32(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: max_epi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pmaxsd %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @min_epi32(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: min_epi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pminsd %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @max_epu32(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: max_epu32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pmaxud %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @min_epu32(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: min_epu32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pminud %xmm1, %xmm0
+; CHECK-NEXT: retl
+;
+ %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
More information about the llvm-commits
mailing list