[llvm] 1b622ff - [VP] IR expansion for inttoptr/ptrtoint
via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 9 00:57:19 PDT 2023
Author: liqin.weng
Date: 2023-09-09T15:34:46+08:00
New Revision: 1b622fff44cac855c79adad3ad78b1f4010a2b96
URL: https://github.com/llvm/llvm-project/commit/1b622fff44cac855c79adad3ad78b1f4010a2b96
DIFF: https://github.com/llvm/llvm-project/commit/1b622fff44cac855c79adad3ad78b1f4010a2b96.diff
LOG: [VP] IR expansion for inttoptr/ptrtoint
Add basic handling for VP ops that can expand to cast intrinsics
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D159478
Added:
llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll
Modified:
llvm/lib/CodeGen/ExpandVectorPredication.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 9807be0bea39eec..edddf8aea40015c 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -179,6 +179,10 @@ struct CachingVPExpander {
Value *expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &PI);
+ /// Lower this VP cast operation to a non-VP intrinsic.
+ Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder,
+ VPIntrinsic &VPI);
+
/// Lower this VP memory operation to a non-VP intrinsic.
Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
VPIntrinsic &VPI);
@@ -436,6 +440,27 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
return Reduction;
}
+Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder,
+ VPIntrinsic &VPI) {
+ // TODO: Add anthor Cast Intrinsic, VP_TRUNC/VP_ZEXT
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Not a VP memory intrinsic");
+ case Intrinsic::vp_inttoptr: {
+ Value *NewOp =
+ Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ case Intrinsic::vp_ptrtoint: {
+ Value *NewOp =
+ Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ }
+}
+
Value *
CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
VPIntrinsic &VPI) {
@@ -598,6 +623,10 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(&VPI))
return expandPredicationInComparison(Builder, *VPCmp);
+ if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) {
+ return expandPredicationToCastIntrinsic(Builder, VPI);
+ }
+
switch (VPI.getIntrinsicID()) {
default:
break;
diff --git a/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll
new file mode 100644
index 000000000000000..fc95e02bf1c0929
--- /dev/null
+++ b/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
+
+declare <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32>, <4 x i1>, i32)
+define <4 x ptr> @inttoptr_v4p0_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; X86-LABEL: inttoptr_v4p0_v4i32:
+; X86: # %bb.0:
+; X86-NEXT: retl
+;
+; SSE-LABEL: inttoptr_v4p0_v4i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: inttoptr_v4p0_v4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: inttoptr_v4p0_v4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: inttoptr_v4p0_v4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512-NEXT: retq
+ %v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x ptr> %v
+}
+
+declare <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i64(<4 x i64>, <4 x i1>, i32)
+
+define <4 x ptr> @inttoptr_v4p0_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; X86-LABEL: inttoptr_v4p0_v4i64:
+; X86: # %bb.0:
+; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; SSE-LABEL: inttoptr_v4p0_v4i64:
+; SSE: # %bb.0:
+; SSE-NEXT: retq
+;
+; AVX-LABEL: inttoptr_v4p0_v4i64:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+ %v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x ptr> %v
+}
+
+declare <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32)
+
+define <4 x i32> @ptrtoint_v4i32_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %evl) {
+; X86-LABEL: ptrtoint_v4i32_v4p0:
+; X86: # %bb.0:
+; X86-NEXT: retl
+;
+; SSE-LABEL: ptrtoint_v4i32_v4p0:
+; SSE: # %bb.0:
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: ptrtoint_v4i32_v4p0:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: ptrtoint_v4i32_v4p0:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: ptrtoint_v4i32_v4p0:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovqd %ymm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %v = call <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <4 x i64> @llvm.vp.ptrtoint.v4i64.v4p0(<4 x ptr>, <4 x i1>, i32)
+
+define <4 x i64> @ptrtoint_v4i64_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %evl) {
+; X86-LABEL: ptrtoint_v4i64_v4p0:
+; X86: # %bb.0:
+; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X86-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X86-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; SSE-LABEL: ptrtoint_v4i64_v4p0:
+; SSE: # %bb.0:
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ptrtoint_v4i64_v4p0:
+; AVX: # %bb.0:
+; AVX-NEXT: retq
+ %v = call <4 x i64> @llvm.vp.ptrtoint.v4i64.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
More information about the llvm-commits
mailing list