[PATCH] D60975: Convert a masked.gather of at most one element to a masked.load
Philip Reames via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 22 11:57:30 PDT 2019
reames created this revision.
reames added reviewers: craig.topper, spatel.
Herald added subscribers: llvm-commits, bollu, mcrosier.
Herald added a project: LLVM.
Split off of D59703 <https://reviews.llvm.org/D59703>. If we have a gather with a single element, that's equivalent to a single element masked.load from the respective address.
Repository:
rL LLVM
https://reviews.llvm.org/D60975
Files:
lib/Transforms/InstCombine/InstCombineCalls.cpp
test/Transforms/InstCombine/masked_intrinsics.ll
Index: test/Transforms/InstCombine/masked_intrinsics.ll
===================================================================
--- test/Transforms/InstCombine/masked_intrinsics.ll
+++ test/Transforms/InstCombine/masked_intrinsics.ll
@@ -206,8 +206,12 @@
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 undef, i64 undef, i64 2, i64 undef>
; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> undef, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 0>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> [[PT_V2]])
-; CHECK-NEXT: ret <4 x double> [[RES]]
+; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x double*> [[PTRS]] to <4 x <1 x double>*>
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x <1 x double>*> [[BC]], i64 2
+; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <1 x double>, <1 x double>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[UNMASKEDLOAD]], <1 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[PT_V2]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT: ret <4 x double> [[TMP3]]
;
%ptrs = getelementptr double, double *%base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
%pt_v1 = insertelement <4 x double> undef, double %pt, i64 0
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -1213,7 +1214,6 @@
}
// TODO, Obvious Missing Transforms:
-// * Single constant active lane load -> load
// * Dereferenceable address & few lanes -> scalarize speculative load/selects
// * Adjacent vector addresses -> masked.load
// * Narrow width by halfs excluding zero/undef lanes
@@ -1222,9 +1222,37 @@
static Instruction *simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC) {
// If the mask is all zeros, return the "passthru" argument of the gather.
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
- if (ConstMask && ConstMask->isNullValue())
+ if (!ConstMask)
+ return nullptr;
+ if (ConstMask->isNullValue())
return IC.replaceInstUsesWith(II, II.getArgOperand(3));
+ APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+ // If we have at most a single lane active, emit a scalar masked.load. If
+ // the mask is known to be active (as opposed to simply not known inactive),
+ // then the masked.load combines will convert it to a simple load.
+ if (DemandedElts.isPowerOf2()) {
+ // Note: APInt indexes the bit vector from LSB to MSB, thus
+ // countTrailingZeros returns the index in the vector which is set.
+ unsigned Idx = DemandedElts.countTrailingZeros();
+ auto &B = IC.Builder;
+ auto *PtrLane = B.CreateExtractElement(II.getArgOperand(0), Idx);
+ unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+ auto *MaskLane = B.CreateExtractElement(II.getArgOperand(2), Idx);
+ auto *PassThrough = II.getArgOperand(3);
+ auto *PTLane = B.CreateExtractElement(PassThrough, Idx);
+ // TODO: pull out a scalar masked load helper function.
+ auto *PTy = PointerType::get(VectorType::get(PTLane->getType(), 1),
+ PtrLane->getType()->getPointerAddressSpace());
+ auto *ML = B.CreateMaskedLoad(B.CreateBitCast(PtrLane, PTy),
+ Alignment,
+ B.CreateVectorSplat(1, MaskLane),
+ B.CreateVectorSplat(1, PTLane));
+ auto *E = B.CreateExtractElement(ML, (uint64_t)0);
+ auto *Res = B.CreateInsertElement(PassThrough, E, Idx);
+ return IC.replaceInstUsesWith(II, Res);
+ }
+
return nullptr;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D60975.196108.patch
Type: text/x-patch
Size: 4353 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190422/430eda8c/attachment.bin>
More information about the llvm-commits
mailing list