[llvm-branch-commits] [llvm] e185b1d - [ConstProp] Constant propagation for get.active.lane.mask instrinsics

David Green via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Jan 8 08:15:01 PST 2021


Author: David Green
Date: 2021-01-08T16:10:01Z
New Revision: e185b1dd7b34c352167823295281f1bf1df09976

URL: https://github.com/llvm/llvm-project/commit/e185b1dd7b34c352167823295281f1bf1df09976
DIFF: https://github.com/llvm/llvm-project/commit/e185b1dd7b34c352167823295281f1bf1df09976.diff

LOG: [ConstProp] Constant propagation for get.active.lane.mask instrinsics

Similar to the Arm VCTP intrinsics, if the operands of an
active.lane.mask are both known, the constant lane mask can be
calculated. This can come up after unrolling the loops.

Differential Revision: https://reviews.llvm.org/D94103

Added: 
    llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll

Modified: 
    llvm/lib/Analysis/ConstantFolding.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 7b0d4bd5172b..22b9acbc03b8 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1456,6 +1456,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::launder_invariant_group:
   case Intrinsic::strip_invariant_group:
   case Intrinsic::masked_load:
+  case Intrinsic::get_active_lane_mask:
   case Intrinsic::abs:
   case Intrinsic::smax:
   case Intrinsic::smin:
@@ -2927,6 +2928,25 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
     }
     break;
   }
+  case Intrinsic::get_active_lane_mask: {
+    auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
+    auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
+    if (Op0 && Op1) {
+      unsigned Lanes = FVTy->getNumElements();
+      uint64_t Base = Op0->getZExtValue();
+      uint64_t Limit = Op1->getZExtValue();
+
+      SmallVector<Constant *, 16> NCs;
+      for (unsigned i = 0; i < Lanes; i++) {
+        if (Base + i < Limit)
+          NCs.push_back(ConstantInt::getTrue(Ty));
+        else
+          NCs.push_back(ConstantInt::getFalse(Ty));
+      }
+      return ConstantVector::get(NCs);
+    }
+    break;
+  }
   default:
     break;
   }

diff  --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
new file mode 100644
index 000000000000..a6006bca169c
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll
@@ -0,0 +1,300 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instsimplify -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define <16 x i1> @v16i1_0() {
+; CHECK-LABEL: @v16i1_0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> zeroinitializer
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 0)
+  ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_1() {
+; CHECK-LABEL: @v16i1_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 1)
+  ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_8() {
+; CHECK-LABEL: @v16i1_8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 8)
+  ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_15() {
+; CHECK-LABEL: @v16i1_15(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 15)
+  ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_16() {
+; CHECK-LABEL: @v16i1_16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 16)
+  ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_100() {
+; CHECK-LABEL: @v16i1_100(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 100)
+  ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_m1() {
+; CHECK-LABEL: @v16i1_m1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 -1)
+  ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_10_11() {
+; CHECK-LABEL: @v16i1_10_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 10, i32 11)
+  ret <16 x i1> %int
+}
+
+define <16 x i1> @v16i1_12_11() {
+; CHECK-LABEL: @v16i1_12_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> zeroinitializer
+;
+entry:
+  %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 12, i32 11)
+  ret <16 x i1> %int
+}
+
+
+
+define <8 x i1> @v8i1_0() {
+; CHECK-LABEL: @v8i1_0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> zeroinitializer
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 0)
+  ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_1() {
+; CHECK-LABEL: @v8i1_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 1)
+  ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_4() {
+; CHECK-LABEL: @v8i1_4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 4)
+  ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_7() {
+; CHECK-LABEL: @v8i1_7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 7)
+  ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_8() {
+; CHECK-LABEL: @v8i1_8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 8)
+  ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_100() {
+; CHECK-LABEL: @v8i1_100(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 100)
+  ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_m1() {
+; CHECK-LABEL: @v8i1_m1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 -1)
+  ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_10_11() {
+; CHECK-LABEL: @v8i1_10_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 10, i32 11)
+  ret <8 x i1> %int
+}
+
+define <8 x i1> @v8i1_12_11() {
+; CHECK-LABEL: @v8i1_12_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> zeroinitializer
+;
+entry:
+  %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 12, i32 11)
+  ret <8 x i1> %int
+}
+
+
+
+define <4 x i1> @v4i1_0() {
+; CHECK-LABEL: @v4i1_0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 0)
+  ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_1() {
+; CHECK-LABEL: @v4i1_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 1)
+  ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_3() {
+; CHECK-LABEL: @v4i1_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 false>
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 3)
+  ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_4() {
+; CHECK-LABEL: @v4i1_4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4)
+  ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_100() {
+; CHECK-LABEL: @v4i1_100(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 100)
+  ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_m1() {
+; CHECK-LABEL: @v4i1_m1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 -1)
+  ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_10_11() {
+; CHECK-LABEL: @v4i1_10_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 10, i32 11)
+  ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_12_11() {
+; CHECK-LABEL: @v4i1_12_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 12, i32 11)
+  ret <4 x i1> %int
+}
+
+
+
+define <4 x i1> @v4i1_nc1(i32 %x) {
+; CHECK-LABEL: @v4i1_nc1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[X:%.*]], i32 11)
+; CHECK-NEXT:    ret <4 x i1> [[INT]]
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %x, i32 11)
+  ret <4 x i1> %int
+}
+
+define <4 x i1> @v4i1_nc2(i32 %x) {
+; CHECK-LABEL: @v4i1_nc2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 [[X:%.*]])
+; CHECK-NEXT:    ret <4 x i1> [[INT]]
+;
+entry:
+  %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 %x)
+  ret <4 x i1> %int
+}
+
+
+
+
+
+declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
+declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
+declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)


        


More information about the llvm-branch-commits mailing list