[llvm] 28e74e6 - [VP] IR expansion for abs/smax/smin/umax/umin
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 11 06:20:58 PDT 2023
Author: liqin.weng
Date: 2023-09-11T21:14:37+08:00
New Revision: 28e74e61801a0c01b57d08b100644a1cac4b3c7c
URL: https://github.com/llvm/llvm-project/commit/28e74e61801a0c01b57d08b100644a1cac4b3c7c
DIFF: https://github.com/llvm/llvm-project/commit/28e74e61801a0c01b57d08b100644a1cac4b3c7c.diff
LOG: [VP] IR expansion for abs/smax/smin/umax/umin
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D159495
Added:
Modified:
llvm/lib/CodeGen/ExpandVectorPredication.cpp
llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index edddf8aea40015c..2e12f7b78362b70 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -171,6 +171,10 @@ struct CachingVPExpander {
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
+ /// Lower this VP int call to a unpredicated int call.
+ Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI,
+ unsigned UnpredicatedIntrinsicID);
+
/// Lower this VP fp call to a unpredicated fp call.
Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI,
unsigned UnpredicatedIntrinsicID);
@@ -279,6 +283,26 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
return NewBinOp;
}
+Value *CachingVPExpander::expandPredicationToIntCall(
+ IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) {
+ switch (UnpredicatedIntrinsicID) {
+ case Intrinsic::abs:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin: {
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp = Builder.CreateCall(Fn, {Op0, Op1}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ }
+ return nullptr;
+}
+
Value *CachingVPExpander::expandPredicationToFPCall(
IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) {
assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
@@ -635,6 +659,16 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
replaceOperation(*NewNegOp, VPI);
return NewNegOp;
}
+ case Intrinsic::vp_abs:
+ return expandPredicationToIntCall(Builder, VPI, Intrinsic::abs);
+ case Intrinsic::vp_smax:
+ return expandPredicationToIntCall(Builder, VPI, Intrinsic::smax);
+ case Intrinsic::vp_smin:
+ return expandPredicationToIntCall(Builder, VPI, Intrinsic::smin);
+ case Intrinsic::vp_umax:
+ return expandPredicationToIntCall(Builder, VPI, Intrinsic::umax);
+ case Intrinsic::vp_umin:
+ return expandPredicationToIntCall(Builder, VPI, Intrinsic::umin);
case Intrinsic::vp_fabs:
return expandPredicationToFPCall(Builder, VPI, Intrinsic::fabs);
case Intrinsic::vp_sqrt:
diff --git a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
index db52dd6b47b46cb..868387a59d7016e 100644
--- a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll
@@ -842,42 +842,117 @@ define void @vp_xor_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounw
}
declare <4 x i32> @llvm.vp.xor.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
-; TODO: llvm.vp.abs.v4i32
-;define void @vp_abs_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
-; %res = call <4 x i32> @llvm.vp.abs.v4i32(<4 x i32> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
-; store <4 x i32> %res, ptr %out
-; ret void
-;}
-;declare <4 x i32> @llvm.vp.abs.v4i32(<4 x i32>, <4 x i1>, i32)
+define void @vp_abs_v4i32(<4 x i32> %a0, ptr %out, i32 %vp) nounwind {
+; SSE-LABEL: vp_abs_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: psubd %xmm1, %xmm0
+; SSE-NEXT: movdqa %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: vp_abs_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpabsd %xmm0, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %res = call <4 x i32> @llvm.vp.abs.v4i32(<4 x i32> %a0, i1 false, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
+ store <4 x i32> %res, ptr %out
+ ret void
+}
+declare <4 x i32> @llvm.vp.abs.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
-; TODO: llvm.vp.smax.v4i32
-;define void @vp_smax_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
-; %res = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
-; store <4 x i32> %res, ptr %out
-; ret void
-;}
-;declare <4 x i32> @llvm.vp.smax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+define void @vp_smax_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
+; SSE-LABEL: vp_smax_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: pandn %xmm1, %xmm2
+; SSE-NEXT: por %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: vp_smax_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %res = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
+ store <4 x i32> %res, ptr %out
+ ret void
+}
+declare <4 x i32> @llvm.vp.smax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
-; TODO: llvm.vp.smin.v4i32
-;define void @vp_smin_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
-; %res = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
-; store <4 x i32> %res, ptr %out
-; ret void
-;}
-;declare <4 x i32> @llvm.vp.smin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+define void @vp_smin_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
+; SSE-LABEL: vp_smin_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: pandn %xmm1, %xmm2
+; SSE-NEXT: por %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: vp_smin_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %res = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
+ store <4 x i32> %res, ptr %out
+ ret void
+}
+declare <4 x i32> @llvm.vp.smin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
-; TODO: llvm.vp.umax.v4i32
-;define void @vp_umax_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
-; %res = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
-; store <4 x i32> %res, ptr %out
-; ret void
-;}
-;declare <4 x i32> @llvm.vp.umax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+define void @vp_umax_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
+; SSE-LABEL: vp_umax_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa %xmm1, %xmm3
+; SSE-NEXT: pxor %xmm2, %xmm3
+; SSE-NEXT: pxor %xmm0, %xmm2
+; SSE-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: pandn %xmm1, %xmm2
+; SSE-NEXT: por %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: vp_umax_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %res = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
+ store <4 x i32> %res, ptr %out
+ ret void
+}
+declare <4 x i32> @llvm.vp.umax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
-; TODO: llvm.vp.umin.v4i32
-;define void @vp_umin_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
-; %res = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
-; store <4 x i32> %res, ptr %out
-; ret void
-;}
-;declare <4 x i32> @llvm.vp.umin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+define void @vp_umin_v4i32(<4 x i32> %a0, <4 x i32> %a1, ptr %out, i32 %vp) nounwind {
+; SSE-LABEL: vp_umin_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: pxor %xmm2, %xmm3
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: pandn %xmm1, %xmm2
+; SSE-NEXT: por %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: vp_umin_v4i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %res = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
+ store <4 x i32> %res, ptr %out
+ ret void
+}
+declare <4 x i32> @llvm.vp.umin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
More information about the llvm-commits
mailing list