[PATCH] D59703: Convert a masked.load of a dereferenceable address to an unconditional load
Philip Reames via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 22 11:32:59 PDT 2019
reames updated this revision to Diff 196105.
reames retitled this revision from "Optimize masked.loads and masked.gathers with a single active lane" to "Convert a masked.load of a dereferenceable address to an unconditional load".
reames edited the summary of this revision.
reames added a comment.
Split the patch. This review is now only the masked.load part.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D59703/new/
https://reviews.llvm.org/D59703
Files:
lib/Transforms/InstCombine/InstCombineCalls.cpp
test/Transforms/InstCombine/masked_intrinsics.ll
Index: test/Transforms/InstCombine/masked_intrinsics.ll
===================================================================
--- test/Transforms/InstCombine/masked_intrinsics.ll
+++ test/Transforms/InstCombine/masked_intrinsics.ll
@@ -87,8 +87,9 @@
; CHECK-LABEL: @load_speculative(
; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
-; CHECK-NEXT: ret <2 x double> [[RES]]
+; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
double %pt, <2 x i1> %mask) {
%ptv1 = insertelement <2 x double> undef, double %pt, i64 0
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -1165,19 +1166,29 @@
}
// TODO, Obvious Missing Transforms:
-// * Dereferenceable address -> speculative load/select
+// * Single active lane to scalar masked load
// * Narrow width by halfs excluding zero/undef lanes
static Value *simplifyMaskedLoad(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
+ Value *LoadPtr = II.getArgOperand(0);
+ unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+
// If the mask is all ones or undefs, this is a plain vector load of the 1st
// argument.
- if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
- Value *LoadPtr = II.getArgOperand(0);
- unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
+ if (maskIsAllOneOrUndef(II.getArgOperand(2)))
return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
- }
+ // If we can unconditionally load from this address, replace with a
+ // load/select idiom. TODO: use DT for context sensative query
+ if (isDereferenceableAndAlignedPointer(LoadPtr, Alignment,
+ II.getModule()->getDataLayout(),
+ &II, nullptr)) {
+ auto *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+ return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
+ }
+
return nullptr;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59703.196105.patch
Type: text/x-patch
Size: 3091 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190422/7321933b/attachment.bin>
More information about the llvm-commits
mailing list