[llvm-branch-commits] [llvm] e185b1d - [ConstProp] Constant propagation for get.active.lane.mask instrinsics
David Green via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 8 08:15:01 PST 2021
Author: David Green
Date: 2021-01-08T16:10:01Z
New Revision: e185b1dd7b34c352167823295281f1bf1df09976
URL: https://github.com/llvm/llvm-project/commit/e185b1dd7b34c352167823295281f1bf1df09976
DIFF: https://github.com/llvm/llvm-project/commit/e185b1dd7b34c352167823295281f1bf1df09976.diff
LOG: [ConstProp] Constant propagation for get.active.lane.mask instrinsics
Similar to the Arm VCTP intrinsics, if the operands of an
active.lane.mask are both known, the constant lane mask can be
calculated. This can come up after unrolling the loops.
Differential Revision: https://reviews.llvm.org/D94103
Added:
llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
Modified:
llvm/lib/Analysis/ConstantFolding.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 7b0d4bd5172b..22b9acbc03b8 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1456,6 +1456,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::masked_load:
+ case Intrinsic::get_active_lane_mask:
case Intrinsic::abs:
case Intrinsic::smax:
case Intrinsic::smin:
@@ -2927,6 +2928,25 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
}
break;
}
+ case Intrinsic::get_active_lane_mask: {
+ auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
+ auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
+ if (Op0 && Op1) {
+ unsigned Lanes = FVTy->getNumElements();
+ uint64_t Base = Op0->getZExtValue();
+ uint64_t Limit = Op1->getZExtValue();
+
+ SmallVector<Constant *, 16> NCs;
+ for (unsigned i = 0; i < Lanes; i++) {
+ if (Base + i < Limit)
+ NCs.push_back(ConstantInt::getTrue(Ty));
+ else
+ NCs.push_back(ConstantInt::getFalse(Ty));
+ }
+ return ConstantVector::get(NCs);
+ }
+ break;
+ }
default:
break;
}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
new file mode 100644
index 000000000000..a6006bca169c
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
@@ -0,0 +1,300 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instsimplify -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define <16 x i1> @v16i1_0() {
+; CHECK-LABEL: @v16i1_0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> zeroinitializer
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 0)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_1() {
+; CHECK-LABEL: @v16i1_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 1)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_8() {
+; CHECK-LABEL: @v16i1_8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 8)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_15() {
+; CHECK-LABEL: @v16i1_15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 15)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_16() {
+; CHECK-LABEL: @v16i1_16(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 16)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_100() {
+; CHECK-LABEL: @v16i1_100(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 100)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_m1() {
+; CHECK-LABEL: @v16i1_m1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 -1)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_10_11() {
+; CHECK-LABEL: @v16i1_10_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 10, i32 11)
+ ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_12_11() {
+; CHECK-LABEL: @v16i1_12_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <16 x i1> zeroinitializer
+;
+entry:
+ %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 12, i32 11)
+ ret <16 x i1> %int
+}
+
+
+
+define <8 x i1> @v8i1_0() {
+; CHECK-LABEL: @v8i1_0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> zeroinitializer
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 0)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_1() {
+; CHECK-LABEL: @v8i1_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 1)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_4() {
+; CHECK-LABEL: @v8i1_4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 4)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_7() {
+; CHECK-LABEL: @v8i1_7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 7)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_8() {
+; CHECK-LABEL: @v8i1_8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 8)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_100() {
+; CHECK-LABEL: @v8i1_100(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 100)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_m1() {
+; CHECK-LABEL: @v8i1_m1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 -1)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_10_11() {
+; CHECK-LABEL: @v8i1_10_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 10, i32 11)
+ ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_12_11() {
+; CHECK-LABEL: @v8i1_12_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <8 x i1> zeroinitializer
+;
+entry:
+ %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 12, i32 11)
+ ret <8 x i1> %int
+}
+
+
+
+define <4 x i1> @v4i1_0() {
+; CHECK-LABEL: @v4i1_0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> zeroinitializer
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 0)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_1() {
+; CHECK-LABEL: @v4i1_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 1)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_3() {
+; CHECK-LABEL: @v4i1_3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 3)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_4() {
+; CHECK-LABEL: @v4i1_4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_100() {
+; CHECK-LABEL: @v4i1_100(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 100)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_m1() {
+; CHECK-LABEL: @v4i1_m1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 -1)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_10_11() {
+; CHECK-LABEL: @v4i1_10_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 10, i32 11)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_12_11() {
+; CHECK-LABEL: @v4i1_12_11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <4 x i1> zeroinitializer
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 12, i32 11)
+ ret <4 x i1> %int
+}
+
+
+
+define <4 x i1> @v4i1_nc1(i32 %x) {
+; CHECK-LABEL: @v4i1_nc1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[X:%.*]], i32 11)
+; CHECK-NEXT: ret <4 x i1> [[INT]]
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %x, i32 11)
+ ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_nc2(i32 %x) {
+; CHECK-LABEL: @v4i1_nc2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 [[X:%.*]])
+; CHECK-NEXT: ret <4 x i1> [[INT]]
+;
+entry:
+ %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 %x)
+ ret <4 x i1> %int
+}
+
+
+
+
+
+declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
+declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
+declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
More information about the llvm-branch-commits
mailing list