[PATCH] D126692: [InstCombine] Expand select+masked_load combine to include FP splats of -0.0

Tue May 31 05:20:54 PDT 2022

david-arm created this revision.
david-arm added reviewers: sdesmalen, kmclaughlin, CarolineConcatto, MattDevereau, aqjune.
Herald added a subscriber: hiraditya.
Herald added a project: All.
david-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

When tail-folding the vectoriser sometimes generates a select instruction
that selects between data and a splat of -0.0. We already have a transform
in InstCombine that folds a select of a masked load and zeroinitializer
into a single masked load with a zeroinitializer passthru value. We can
do something similar for splats of -0.0 when the function has the
attribute no-signed-zeros-fp-math set to true. In this case we can
just replace a splat of -0.0 with zeroinitializer, i.e. a splat of 0.0.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D126692

Files:
  llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
  llvm/test/Transforms/InstCombine/select-masked_load.ll


Index: llvm/test/Transforms/InstCombine/select-masked_load.ll
===================================================================

--- llvm/test/Transforms/InstCombine/select-masked_load.ll
+++ llvm/test/Transforms/InstCombine/select-masked_load.ll
@@ -106,6 +106,27 @@
   ret <8 x float> %1
 }
 
+define <4 x float> @masked_load_and_minus_zero_inactive_1(<4 x float>* %ptr, <4 x i1> %mask) #0 {
+; CHECK-LABEL: @masked_load_and_minus_zero_inactive_1(
+; CHECK: %load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %ptr, i32 4, <4 x i1> %mask, <4 x float> zeroinitializer)
+; CHECK-NEXT: ret <4 x float> %load
+  %load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %ptr, i32 4, <4 x i1> %mask, <4 x float> poison)
+  %masked = select <4 x i1> %mask, <4 x float> %load, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
+  ret <4 x float> %masked
+}
+
+define <vscale x 4 x float> @masked_load_and_minus_zero_inactive_2(<vscale x 4 x float>* %ptr, <vscale x 4 x i1> %mask) #0 {
+; CHECK-LABEL: @masked_load_and_minus_zero_inactive_2(
+; CHECK: %load = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %ptr, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT: ret <vscale x 4 x float> %load
+  %load = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %ptr, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
+  %masked = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %load, <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+  ret <vscale x 4 x float> %masked
+}
+
 declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>)
 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
+declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>*, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x float>)
+
+attributes #0 = { "no-signed-zeros-fp-math"="true" }
Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/OverflowInstAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/ConstantRange.h"
@@ -2638,6 +2639,21 @@
   return R;
 }
 
+bool isEffectivelyZero(Function *F, Value *Val) {
+  if (match(Val, m_Zero()))
+    return true;
+
+  if (F) {
+    Value *SplatVal = llvm::getSplatValue(Val);
+    ConstantFP *SplatFPVal = dyn_cast_or_null<ConstantFP>(SplatVal);
+    if (SplatFPVal && SplatFPVal->isExactlyValue(-0.0) &&
+        F->getFnAttribute("no-signed-zeros-fp-math").getValueAsBool())
+      return true;
+  }
+
+  return false;
+}
+
 Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -3167,14 +3183,15 @@
 
   // select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)
   // Load inst is intentionally not checked for hasOneUse()
-  if (match(FalseVal, m_Zero()) &&
+  if (isEffectivelyZero(SI.getParent()->getParent(), FalseVal) &&
       (match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal),
                                    m_CombineOr(m_Undef(), m_Zero()))) ||
        match(TrueVal, m_MaskedGather(m_Value(), m_Value(), m_Specific(CondVal),
                                      m_CombineOr(m_Undef(), m_Zero()))))) {
     auto *MaskedInst = cast<IntrinsicInst>(TrueVal);
     if (isa<UndefValue>(MaskedInst->getArgOperand(3)))
-      MaskedInst->setArgOperand(3, FalseVal /* Zero */);
+      MaskedInst->setArgOperand(
+          3, ConstantAggregateZero::get(FalseVal->getType()));
     return replaceInstUsesWith(SI, MaskedInst);
   }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D126692.433056.patch
Type: text/x-patch
Size: 4326 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220531/c0fbbe2a/attachment.bin>