[PATCH] D126692: [InstCombine] Expand select+masked_load combine to include FP splats of -0.0
David Sherwood via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue May 31 05:20:54 PDT 2022
david-arm created this revision.
david-arm added reviewers: sdesmalen, kmclaughlin, CarolineConcatto, MattDevereau, aqjune.
Herald added a subscriber: hiraditya.
Herald added a project: All.
david-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
When tail-folding the vectoriser sometimes generates a select instruction
that selects between data and a splat of -0.0. We already have a transform
in InstCombine that folds a select of a masked load and zeroinitializer
into a single masked load with a zeroinitializer passthru value. We can
do something similar for splats of -0.0 when the function has the
attribute no-signed-zeros-fp-math set to true. In this case we can
just replace a splat of -0.0 with zeroinitializer, i.e. a splat of 0.0.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D126692
Files:
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
llvm/test/Transforms/InstCombine/select-masked_load.ll
Index: llvm/test/Transforms/InstCombine/select-masked_load.ll
===================================================================
--- llvm/test/Transforms/InstCombine/select-masked_load.ll
+++ llvm/test/Transforms/InstCombine/select-masked_load.ll
@@ -106,6 +106,27 @@
ret <8 x float> %1
}
+define <4 x float> @masked_load_and_minus_zero_inactive_1(<4 x float>* %ptr, <4 x i1> %mask) #0 {
+; CHECK-LABEL: @masked_load_and_minus_zero_inactive_1(
+; CHECK: %load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %ptr, i32 4, <4 x i1> %mask, <4 x float> zeroinitializer)
+; CHECK-NEXT: ret <4 x float> %load
+ %load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %ptr, i32 4, <4 x i1> %mask, <4 x float> poison)
+ %masked = select <4 x i1> %mask, <4 x float> %load, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
+ ret <4 x float> %masked
+}
+
+define <vscale x 4 x float> @masked_load_and_minus_zero_inactive_2(<vscale x 4 x float>* %ptr, <vscale x 4 x i1> %mask) #0 {
+; CHECK-LABEL: @masked_load_and_minus_zero_inactive_2(
+; CHECK: %load = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %ptr, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT: ret <vscale x 4 x float> %load
+ %load = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %ptr, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
+ %masked = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %load, <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+ ret <vscale x 4 x float> %masked
+}
+
declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
+declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>*, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x float>)
+
+attributes #0 = { "no-signed-zeros-fp-math"="true" }
Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/OverflowInstAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
@@ -2638,6 +2639,21 @@
return R;
}
+bool isEffectivelyZero(Function *F, Value *Val) {
+ if (match(Val, m_Zero()))
+ return true;
+
+ if (F) {
+ Value *SplatVal = llvm::getSplatValue(Val);
+ ConstantFP *SplatFPVal = dyn_cast_or_null<ConstantFP>(SplatVal);
+ if (SplatFPVal && SplatFPVal->isExactlyValue(-0.0) &&
+ F->getFnAttribute("no-signed-zeros-fp-math").getValueAsBool())
+ return true;
+ }
+
+ return false;
+}
+
Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@@ -3167,14 +3183,15 @@
// select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)
// Load inst is intentionally not checked for hasOneUse()
- if (match(FalseVal, m_Zero()) &&
+ if (isEffectivelyZero(SI.getParent()->getParent(), FalseVal) &&
(match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal),
m_CombineOr(m_Undef(), m_Zero()))) ||
match(TrueVal, m_MaskedGather(m_Value(), m_Value(), m_Specific(CondVal),
m_CombineOr(m_Undef(), m_Zero()))))) {
auto *MaskedInst = cast<IntrinsicInst>(TrueVal);
if (isa<UndefValue>(MaskedInst->getArgOperand(3)))
- MaskedInst->setArgOperand(3, FalseVal /* Zero */);
+ MaskedInst->setArgOperand(
+ 3, ConstantAggregateZero::get(FalseVal->getType()));
return replaceInstUsesWith(SI, MaskedInst);
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D126692.433056.patch
Type: text/x-patch
Size: 4326 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220531/c0fbbe2a/attachment.bin>
More information about the llvm-commits
mailing list