[llvm] r367011 - [InstCombine] try to narrow a truncated load
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 25 05:14:27 PDT 2019
Author: spatel
Date: Thu Jul 25 05:14:27 2019
New Revision: 367011
URL: http://llvm.org/viewvc/llvm-project?rev=367011&view=rev
Log:
[InstCombine] try to narrow a truncated load
trunc (load X) --> load (bitcast X to narrow type)
We have this transform in DAGCombiner::ReduceLoadWidth(), but the truncated
load pattern can interfere with other instcombine transforms, so I'd like to
allow the fold sooner.
Example:
https://bugs.llvm.org/show_bug.cgi?id=16739
...in that report, we have bitcasts bracketing these ops, so those could get
eliminated too.
We've generally ruled out widening of loads early in IR ( LoadCombine -
http://lists.llvm.org/pipermail/llvm-dev/2016-September/105291.html ), but
that reasoning may not apply to narrowing if we can preserve information
such as the dereferenceable range.
Differential Revision: https://reviews.llvm.org/D64432
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
llvm/trunk/test/Transforms/InstCombine/trunc-load.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp?rev=367011&r1=367010&r2=367011&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp Thu Jul 25 05:14:27 2019
@@ -681,6 +681,42 @@ static Instruction *shrinkInsertElt(Cast
return nullptr;
}
+static Instruction *narrowLoad(TruncInst &Trunc,
+ InstCombiner::BuilderTy &Builder,
+ const DataLayout &DL) {
+ // Check the layout to ensure we are not creating an unsupported operation.
+ // TODO: Create a GEP to offset the load?
+ if (!DL.isLittleEndian())
+ return nullptr;
+ unsigned NarrowBitWidth = Trunc.getDestTy()->getPrimitiveSizeInBits();
+ if (!DL.isLegalInteger(NarrowBitWidth))
+ return nullptr;
+
+ // Match a truncated load with no other uses.
+ Value *X;
+ if (!match(Trunc.getOperand(0), m_OneUse(m_Load(m_Value(X)))))
+ return nullptr;
+ LoadInst *WideLoad = cast<LoadInst>(Trunc.getOperand(0));
+ if (!WideLoad->isSimple())
+ return nullptr;
+
+ // Don't narrow this load if we would lose information about the
+ // dereferenceable range.
+ bool CanBeNull;
+ uint64_t DerefBits = X->getPointerDereferenceableBytes(DL, CanBeNull) * 8;
+ if (DerefBits < WideLoad->getType()->getPrimitiveSizeInBits())
+ return nullptr;
+
+ // trunc (load X) --> load (bitcast X)
+ PointerType *PtrTy = PointerType::get(Trunc.getDestTy(),
+ WideLoad->getPointerAddressSpace());
+ Value *Bitcast = Builder.CreatePointerCast(X, PtrTy);
+ LoadInst *NarrowLoad = new LoadInst(Trunc.getDestTy(), Bitcast);
+ NarrowLoad->setAlignment(WideLoad->getAlignment());
+ copyMetadataForLoad(*NarrowLoad, *WideLoad);
+ return NarrowLoad;
+}
+
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
@@ -840,6 +876,9 @@ Instruction *InstCombiner::visitTrunc(Tr
if (Instruction *I = foldVecTruncToExtElt(CI, *this))
return I;
+ if (Instruction *NewLoad = narrowLoad(CI, Builder, DL))
+ return NewLoad;
+
return nullptr;
}
Modified: llvm/trunk/test/Transforms/InstCombine/trunc-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/trunc-load.ll?rev=367011&r1=367010&r2=367011&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/trunc-load.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/trunc-load.ll Thu Jul 25 05:14:27 2019
@@ -29,10 +29,15 @@ define i32 @truncload_small_deref(i64* d
; On little-endian, we can narrow the load without an offset.
define i32 @truncload_deref(i64* dereferenceable(8) %ptr) {
-; CHECK-LABEL: @truncload_deref(
-; CHECK-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT: ret i32 [[R]]
+; LE-LABEL: @truncload_deref(
+; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32*
+; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4
+; LE-NEXT: ret i32 [[R]]
+;
+; BE-LABEL: @truncload_deref(
+; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4
+; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
+; BE-NEXT: ret i32 [[R]]
;
%x = load i64, i64* %ptr
%r = trunc i64 %x to i32
@@ -42,10 +47,15 @@ define i32 @truncload_deref(i64* derefer
; Preserve alignment.
define i16 @truncload_align(i32* dereferenceable(14) %ptr) {
-; CHECK-LABEL: @truncload_align(
-; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16
-; CHECK-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16
-; CHECK-NEXT: ret i16 [[R]]
+; LE-LABEL: @truncload_align(
+; LE-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to i16*
+; LE-NEXT: [[R:%.*]] = load i16, i16* [[TMP1]], align 16
+; LE-NEXT: ret i16 [[R]]
+;
+; BE-LABEL: @truncload_align(
+; BE-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16
+; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16
+; BE-NEXT: ret i16 [[R]]
;
%x = load i32, i32* %ptr, align 16
%r = trunc i32 %x to i16
@@ -98,12 +108,40 @@ define i32 @truncload_volatile(i64* dere
; Preserve address space.
define i32 @truncload_address_space(i64 addrspace(1)* dereferenceable(8) %ptr) {
-; CHECK-LABEL: @truncload_address_space(
-; CHECK-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4
-; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT: ret i32 [[R]]
+; LE-LABEL: @truncload_address_space(
+; LE-NEXT: [[TMP1:%.*]] = bitcast i64 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)*
+; LE-NEXT: [[R:%.*]] = load i32, i32 addrspace(1)* [[TMP1]], align 4
+; LE-NEXT: ret i32 [[R]]
+;
+; BE-LABEL: @truncload_address_space(
+; BE-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4
+; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
+; BE-NEXT: ret i32 [[R]]
;
%x = load i64, i64 addrspace(1)* %ptr, align 4
%r = trunc i64 %x to i32
ret i32 %r
}
+
+; Most metadata should be transferred to the narrow load.
+; TODO: We lost the range.
+
+define i32 @truncload_metadata(i64* dereferenceable(8) %ptr) {
+; LE-LABEL: @truncload_metadata(
+; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32*
+; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4, !invariant.load !0, !nontemporal !1
+; LE-NEXT: ret i32 [[R]]
+;
+; BE-LABEL: @truncload_metadata(
+; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4, !range !0, !invariant.load !1, !nontemporal !2
+; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
+; BE-NEXT: ret i32 [[R]]
+;
+ %x = load i64, i64* %ptr, align 4, !invariant.load !0, !nontemporal !1, !range !2
+ %r = trunc i64 %x to i32
+ ret i32 %r
+}
+
+!0 = !{}
+!1 = !{i32 1}
+!2 = !{i64 0, i64 2}
More information about the llvm-commits
mailing list