[PATCH] D115726: [InstCombine] Fold for masked gather when loading the same value each time.

Caroline via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 14 06:12:20 PST 2021


CarolineConcatto created this revision.
Herald added a subscriber: hiraditya.
CarolineConcatto requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

This patch checks in the masked gather when the first operand value is a
splat and the mask is all one, because the masked gather is reloading the
same value each time. This patch replaces this pattern of masked gather by
a scalar load of the value and splats it in a vector.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D115726

Files:
  llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
  llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll


Index: llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define <vscale x 2 x i64> @valid_inv_load_i64(i64* %src) #0 {
+; CHECK-LABEL: @valid_inv_load_i64(
+; CHECK-NEXT:    [[LOAD_COMBINE:%.*]] = load i64, i64* [[SRC:%.*]], align 8
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[LOAD_COMBINE]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[BROADCAST_SPLAT2]]
+;
+  %broadcast.splatinsert = insertelement <vscale x 2 x i64*> poison, i64 *%src, i32 0
+  %broadcast.splat = shufflevector <vscale x 2 x i64*> %broadcast.splatinsert, <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer
+  %res = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %broadcast.splat, i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef)
+  ret <vscale x 2 x i64> %res
+}
+
+;; Not a splat value
+define <vscale x 2 x i64> @invalid_value_inv_load_i64(i64* %src) #0 {
+; CHECK-LABEL: @invalid_value_inv_load_i64(
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64*> poison, i64* [[SRC:%.*]], i32 1
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef)
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[RES]]
+;
+  %broadcast.splatinsert = insertelement <vscale x 2 x i64*> poison, i64 *%src, i32 1
+  %broadcast.splat = shufflevector <vscale x 2 x i64*> %broadcast.splatinsert, <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer
+  %res = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %broadcast.splat, i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef)
+  ret <vscale x 2 x i64> %res
+}
+
+;; Not all one mask
+define <vscale x 2 x i64> @invalid_mask_inv_load_i64(i64* %src) #0 {
+; CHECK-LABEL: @invalid_mask_inv_load_i64(
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64*> poison, i64* [[SRC:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 1), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef)
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[RES]]
+;
+  %broadcast.splatinsert = insertelement <vscale x 2 x i64*> poison, i64 *%src, i32 0
+  %broadcast.splat = shufflevector <vscale x 2 x i64*> %broadcast.splatinsert, <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer
+  %res = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %broadcast.splat, i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 1), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef)
+  ret <vscale x 2 x i64> %res
+}
+
+
+; Function Attrs:
+declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+attributes #0 = { "target-features"="+sve,+sve" }
+
Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -352,9 +352,26 @@
 // * Dereferenceable address & few lanes -> scalarize speculative load/selects
 // * Adjacent vector addresses -> masked.load
 // * Narrow width by halfs excluding zero/undef lanes
-// * Vector splat address w/known mask -> scalar load
 // * Vector incrementing address -> vector masked load
 Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
+  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
+  if (!ConstMask)
+    return nullptr;
+
+  // Vector splat address w/known mask -> scalar load
+  // Fold the gather to load the source vector first lane
+  // because it is reloading the same value each time
+  if (ConstMask->isAllOnesValue())
+    if (auto *Splat = getSplatValue(II.getArgOperand(0))) {
+      auto *VecTy = cast<VectorType>(II.getType());
+      const Align Alignment =
+          cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
+      LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), Splat,
+                                              Alignment, "load.combine");
+      Value *Shuf =
+          Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
+      return replaceInstUsesWith(II, cast<Instruction>(Shuf));
+    }
   return nullptr;
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D115726.394226.patch
Type: text/x-patch
Size: 5924 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211214/e8756bd6/attachment.bin>


More information about the llvm-commits mailing list