[llvm] e185b1d - [ConstProp] Constant propagation for get.active.lane.mask instrinsics
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 8 08:10:24 PST 2021
Author: David Green
Date: 2021-01-08T16:10:01Z
New Revision: e185b1dd7b34c352167823295281f1bf1df09976
URL: https://github.com/llvm/llvm-project/commit/e185b1dd7b34c352167823295281f1bf1df09976
DIFF: https://github.com/llvm/llvm-project/commit/e185b1dd7b34c352167823295281f1bf1df09976.diff
LOG: [ConstProp] Constant propagation for get.active.lane.mask instrinsics
Similar to the Arm VCTP intrinsics, if the operands of an
active.lane.mask are both known, the constant lane mask can be
calculated. This can come up after unrolling the loops.
Differential Revision: https://reviews.llvm.org/D94103
Added:
llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
Modified:
llvm/lib/Analysis/ConstantFolding.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 7b0d4bd5172b..22b9acbc03b8 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1456,6 +1456,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::masked_load:
+ case Intrinsic::get_active_lane_mask:
case Intrinsic::abs:
case Intrinsic::smax:
case Intrinsic::smin:
@@ -2927,6 +2928,25 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
}
break;
}
+ case Intrinsic::get_active_lane_mask: {
+ auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
+ auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
+ if (Op0 && Op1) {
+ unsigned Lanes = FVTy->getNumElements();
+ uint64_t Base = Op0->getZExtValue();
+ uint64_t Limit = Op1->getZExtValue();
+
+ SmallVector<Constant *, 16> NCs;
+ for (unsigned i = 0; i < Lanes; i++) {
+ if (Base + i < Limit)
+ NCs.push_back(ConstantInt::getTrue(Ty));
+ else
+ NCs.push_back(ConstantInt::getFalse(Ty));
+ }
+ return ConstantVector::get(NCs);
+ }
+ break;
+ }
default:
break;
}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
new file mode 100644
index 000000000000..a6006bca169c
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
@@ -0,0 +1,300 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instsimplify -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define <16 x i1> @v16i1_0() {
+; CHECK-LABEL: @v16i1_0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> zeroinitializer
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 0)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_1() {
+; CHECK-LABEL: @v16i1_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 1)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_8() {
+; CHECK-LABEL: @v16i1_8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 8)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_15() {
+; CHECK-LABEL: @v16i1_15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 15)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_16() {
+; CHECK-LABEL: @v16i1_16(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 16)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_100() {
+; CHECK-LABEL: @v16i1_100(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 100)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_m1() {
+; CHECK-LABEL: @v16i1_m1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 -1)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_10_11() {
+; CHECK-LABEL: @v16i1_10_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 10, i32 11)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_12_11() {
+; CHECK-LABEL: @v16i1_12_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> zeroinitializer
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 12, i32 11)
+ ret <16 x i1> %int
+}
+
+
+
+define <8 x i1> @v8i1_0() {
+; CHECK-LABEL: @v8i1_0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> zeroinitializer
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 0)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_1() {
+; CHECK-LABEL: @v8i1_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 1)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_4() {
+; CHECK-LABEL: @v8i1_4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 4)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_7() {
+; CHECK-LABEL: @v8i1_7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 7)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_8() {
+; CHECK-LABEL: @v8i1_8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 8)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_100() {
+; CHECK-LABEL: @v8i1_100(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 100)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_m1() {
+; CHECK-LABEL: @v8i1_m1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 -1)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_10_11() {
+; CHECK-LABEL: @v8i1_10_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 10, i32 11)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_12_11() {
+; CHECK-LABEL: @v8i1_12_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> zeroinitializer
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 12, i32 11)
+ ret <8 x i1> %int
+}
+
+
+
+define <4 x i1> @v4i1_0() {
+; CHECK-LABEL: @v4i1_0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> zeroinitializer
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 0)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_1() {
+; CHECK-LABEL: @v4i1_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 1)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_3() {
+; CHECK-LABEL: @v4i1_3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 3)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_4() {
+; CHECK-LABEL: @v4i1_4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_100() {
+; CHECK-LABEL: @v4i1_100(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 100)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_m1() {
+; CHECK-LABEL: @v4i1_m1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 -1)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_10_11() {
+; CHECK-LABEL: @v4i1_10_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 10, i32 11)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_12_11() {
+; CHECK-LABEL: @v4i1_12_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> zeroinitializer
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 12, i32 11)
+ ret <4 x i1> %int
+}
+
+
+
+define <4 x i1> @v4i1_nc1(i32 %x) {
+; CHECK-LABEL: @v4i1_nc1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[X:%.*]], i32 11)
+; CHECK-NEXT: ret <4 x i1> [[INT]]
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %x, i32 11)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_nc2(i32 %x) {
+; CHECK-LABEL: @v4i1_nc2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 [[X:%.*]])
+; CHECK-NEXT: ret <4 x i1> [[INT]]
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 %x)
+ ret <4 x i1> %int
+}
+
+
+
+
+
+declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
+declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
+declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
More information about the llvm-commits
mailing list