[PATCH] D16691: [InstCombine] simplify masked load intrinsics with constant masks

Thu Jan 28 12:45:40 PST 2016

spatel created this revision.
spatel added reviewers: delena, igorb, RKSimon.
spatel added a subscriber: llvm-commits.
Herald added a subscriber: mcrosier.

A masked load with a zero mask means there's no load.
A masked load with an allOnes mask means it's a normal vector load.

I think something similar may be happening in CodeGenPrepare with D13855, but it doesn't trigger for a target that actually supports these ops (an x86 AVX target for example). We may be able to remove some of that logic. Doing these transforms in InstCombine is a better solution because it will trigger sooner and allow more optimizations from other passes.

Eventually, I think we should be able to replace the x86 intrinsics with the llvm IR intrinsics.

http://reviews.llvm.org/D16691

Files:
  lib/Transforms/InstCombine/InstCombineCalls.cpp
  test/Transforms/InstCombine/masked_intrinsics.ll

Index: test/Transforms/InstCombine/masked_intrinsics.ll
===================================================================

--- test/Transforms/InstCombine/masked_intrinsics.ll
+++ test/Transforms/InstCombine/masked_intrinsics.ll
@@ -2,26 +2,26 @@
 
 declare <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
 
-; FIXME: All of these could be simplified.
 
 define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru)  {
   %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
   ret <2 x double> %res
 
 ; CHECK-LABEL: @load_zeromask(
-; CHECK-NEXT:  %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
-; CHECK-NEXT   ret <2 x double> %res
+; CHECK-NEXT   ret <2 x double> %passthru
 }
 
 define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru)  {
   %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
   ret <2 x double> %res
 
 ; CHECK-LABEL: @load_onemask(
-; CHECK-NEXT:  %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru)
+; CHECK-NEXT:  %unmaskedload = load <2 x double>, <2 x double>* %ptr, align 2
 ; CHECK-NEXT   ret <2 x double> %res
 }
 
+; FIXME: These could be simplified.
+
 define <2 x double> @load_onesetbitmask1(<2 x double>* %ptr, <2 x double> %passthru)  {
   %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 3, <2 x i1> <i1 0, i1 1>, <2 x double> %passthru)
   ret <2 x double> %res
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -675,6 +675,40 @@
   return nullptr;
 }
 
+static Value *simplifyMaskedLoad(const IntrinsicInst &II,
+                                 InstCombiner::BuilderTy &Builder) {
+  assert(II.getNumArgOperands() == 4 &&
+         "Wrong masked intrinsic format");
+  assert(isa<PointerType>(II.getArgOperand(0)->getType()) &&
+         "Wrong type for 1st arg");
+  assert(isa<ConstantInt>(II.getArgOperand(1)) &&
+         "Wrong type for 2nd arg");
+  assert(isa<VectorType>(II.getArgOperand(2)->getType()) &&
+         "Wrong type for 3rd arg");
+  assert(II.getArgOperand(3)->getType() == II.getType() &&
+         "Wrong type for 4th arg");
+
+  Value *LoadPtr = II.getArgOperand(0);
+  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+  Value *Mask = II.getArgOperand(2);
+  Value *PassThru = II.getArgOperand(3);
+
+  // If the mask is all zeros, the "passthru" argument is the result.
+  if (isa<ConstantAggregateZero>(Mask))
+    return PassThru;
+
+  // If the mask is all ones, this is a plain vector load of the first argument.
+  // Note that the mask type is <N x i1> which is not a ConstantDataVector.
+  if (auto *ConstMask = dyn_cast<ConstantVector>(Mask))
+    if (ConstMask->isAllOnesValue())
+      return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
+
+  // TODO: A mask with only one set bit can be reduced to a scalar load and
+  // insertelement into the passthru vector.
+
+  return nullptr;
+}
+
 /// CallInst simplification. This mostly only handles folding of intrinsic
 /// instructions. For normal calls, it allows visitCallSite to do the heavy
 /// lifting.
@@ -799,6 +833,16 @@
     break;
   }
 
+  case Intrinsic::masked_load:
+    if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder))
+      return ReplaceInstUsesWith(CI, SimplifiedMaskedOp);
+    break;
+
+  // TODO: Handle the other masked ops.
+  // case Intrinsic::masked_store:
+  // case Intrinsic::masked_gather:
+  // case Intrinsic::masked_scatter:
+
   case Intrinsic::powi:
     if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // powi(x, 0) -> 1.0


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D16691.46305.patch
Type: text/x-patch
Size: 4076 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160128/aaa90948/attachment.bin>