[llvm] r320989 - [Memcpy Loop Lowering] Remove the fixed int8 lowering.
Sean Fertile via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 18 07:31:14 PST 2017
Author: sfertile
Date: Mon Dec 18 07:31:14 2017
New Revision: 320989
URL: http://llvm.org/viewvc/llvm-project?rev=320989&view=rev
Log:
[Memcpy Loop Lowering] Remove the fixed int8 lowering.
Switch over to the lowering that uses target supplied operand types.
Differential Revision: https://reviews.llvm.org/D41201
Modified:
llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
llvm/trunk/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
llvm/trunk/lib/Transforms/Utils/LowerMemIntrinsics.cpp
llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
llvm/trunk/test/CodeGen/NVPTX/lower-aggr-copies.ll
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=320989&r1=320988&r2=320989&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Mon Dec 18 07:31:14 2017
@@ -862,12 +862,6 @@ public:
unsigned SrcAlign,
unsigned DestAlign) const;
- /// \returns True if we want to test the new memcpy lowering functionality in
- /// Transform/Utils.
- /// Temporary. Will be removed once we move to the new functionality and
- /// remove the old.
- bool useWideIRMemcpyLoopLowering() const;
-
/// \returns True if the two functions have compatible attributes for inlining
/// purposes.
bool areInlineCompatible(const Function *Caller,
Modified: llvm/trunk/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LowerMemIntrinsics.h?rev=320989&r1=320988&r2=320989&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/LowerMemIntrinsics.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/LowerMemIntrinsics.h Mon Dec 18 07:31:14 2017
@@ -25,12 +25,6 @@ class MemSetInst;
class TargetTransformInfo;
class Value;
-/// Emit a loop implementing the semantics of llvm.memcpy with the equivalent
-/// arguments at \p InsertBefore.
-void createMemCpyLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
- Value *CopyLen, unsigned SrcAlign, unsigned DestAlign,
- bool SrcIsVolatile, bool DstIsVolatile);
-
/// Emit a loop implementing the semantics of llvm.memcpy where the size is not
/// a compile-time constant. Loop will be insterted at \p InsertBefore.
void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr,
Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=320989&r1=320988&r2=320989&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Mon Dec 18 07:31:14 2017
@@ -26,11 +26,6 @@ using namespace PatternMatch;
#define DEBUG_TYPE "tti"
-static cl::opt<bool> UseWideMemcpyLoopLowering(
- "use-wide-memcpy-loop-lowering", cl::init(false),
- cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."),
- cl::Hidden);
-
static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
cl::Hidden,
cl::desc("Recognize reduction patterns."));
@@ -547,10 +542,6 @@ void TargetTransformInfo::getMemcpyLoopR
SrcAlign, DestAlign);
}
-bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const {
- return UseWideMemcpyLoopLowering;
-}
-
bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
return TTIImpl->areInlineCompatible(Caller, Callee);
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp?rev=320989&r1=320988&r2=320989&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp Mon Dec 18 07:31:14 2017
@@ -111,23 +111,13 @@ bool NVPTXLowerAggrCopies::runOnFunction
ConstantInt *CopyLen =
ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
- if (!TTI.useWideIRMemcpyLoopLowering()) {
- createMemCpyLoop(/* ConvertedInst */ SI,
- /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
- /* CopyLen */ CopyLen,
- /* SrcAlign */ LI->getAlignment(),
- /* DestAlign */ SI->getAlignment(),
- /* SrcIsVolatile */ LI->isVolatile(),
- /* DstIsVolatile */ SI->isVolatile());
- } else {
- createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
- /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
- /* CopyLen */ CopyLen,
- /* SrcAlign */ LI->getAlignment(),
- /* DestAlign */ SI->getAlignment(),
- /* SrcIsVolatile */ LI->isVolatile(),
- /* DstIsVolatile */ SI->isVolatile(), TTI);
- }
+ createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
+ /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+ /* CopyLen */ CopyLen,
+ /* SrcAlign */ LI->getAlignment(),
+ /* DestAlign */ SI->getAlignment(),
+ /* SrcIsVolatile */ LI->isVolatile(),
+ /* DstIsVolatile */ SI->isVolatile(), TTI);
SI->eraseFromParent();
LI->eraseFromParent();
Modified: llvm/trunk/lib/Transforms/Utils/LowerMemIntrinsics.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LowerMemIntrinsics.cpp?rev=320989&r1=320988&r2=320989&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LowerMemIntrinsics.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LowerMemIntrinsics.cpp Mon Dec 18 07:31:14 2017
@@ -263,61 +263,6 @@ void llvm::createMemCpyLoopUnknownSize(I
}
}
-void llvm::createMemCpyLoop(Instruction *InsertBefore,
- Value *SrcAddr, Value *DstAddr, Value *CopyLen,
- unsigned SrcAlign, unsigned DestAlign,
- bool SrcIsVolatile, bool DstIsVolatile) {
- Type *TypeOfCopyLen = CopyLen->getType();
-
- BasicBlock *OrigBB = InsertBefore->getParent();
- Function *F = OrigBB->getParent();
- BasicBlock *NewBB =
- InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split");
- BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop",
- F, NewBB);
-
- IRBuilder<> Builder(OrigBB->getTerminator());
-
- // SrcAddr and DstAddr are expected to be pointer types,
- // so no check is made here.
- unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
- unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
-
- // Cast pointers to (char *)
- SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
- DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
-
- Builder.CreateCondBr(
- Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
- LoopBB);
- OrigBB->getTerminator()->eraseFromParent();
-
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
- LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
-
- // load from SrcAddr+LoopIndex
- // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
- // word-sized loads and stores.
- Value *Element =
- LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
- LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
- SrcIsVolatile);
- // store at DstAddr+LoopIndex
- LoopBuilder.CreateStore(Element,
- LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
- DstAddr, LoopIndex),
- DstIsVolatile);
-
- // The value for LoopIndex coming from backedge is (LoopIndex + 1)
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
- NewBB);
-}
-
// Lower memmove to IR. memmove is required to correctly copy overlapping memory
// regions; therefore, it has to check the relative positions of the source and
// destination pointers and choose the copy direction accordingly.
@@ -459,38 +404,26 @@ static void createMemSetLoop(Instruction
void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
const TargetTransformInfo &TTI) {
- // Original implementation
- if (!TTI.useWideIRMemcpyLoopLowering()) {
- createMemCpyLoop(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcAlign */ Memcpy->getAlignment(),
- /* DestAlign */ Memcpy->getAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
+ createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
+ /* TargetTransformInfo */ TTI);
} else {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
- createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+ createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
/* SrcAddr */ Memcpy->getRawSource(),
/* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ CI,
+ /* CopyLen */ Memcpy->getLength(),
/* SrcAlign */ Memcpy->getAlignment(),
/* DestAlign */ Memcpy->getAlignment(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransformInfo */ TTI);
- } else {
- createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcAlign */ Memcpy->getAlignment(),
- /* DestAlign */ Memcpy->getAlignment(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransfomrInfo */ TTI);
- }
+ /* TargetTransfomrInfo */ TTI);
}
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll?rev=320989&r1=320988&r2=320989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll Mon Dec 18 07:31:14 2017
@@ -1,5 +1,4 @@
; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -S -amdgpu-lower-intrinsics -use-wide-memcpy-loop-lowering=true %s | FileCheck -check-prefix=WOPT %s
declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1
declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1
@@ -18,21 +17,14 @@ define amdgpu_kernel void @max_size_smal
; Smallest static size which will be expanded
; OPT-LABEL: @min_size_large_static_memcpy_caller0(
; OPT-NOT: call
-; OPT: getelementptr
-; OPT-NEXT: load i8
-; OPT: getelementptr
-; OPT-NEXT: store i8
-
-; WOPT-LABEL: @min_size_large_static_memcpy_caller0(
-; WOPT-NOT: call
-; WOPT: br label %load-store-loop
-; WOPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index
-; WOPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]]
-; WOPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index
-; WOPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]]
-; WOPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1
-; WOPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025
-; WOPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split
+; OPT: br label %load-store-loop
+; OPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index
+; OPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]]
+; OPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index
+; OPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]]
+; OPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1
+; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025
+; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split
define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false)
ret void
Modified: llvm/trunk/test/CodeGen/NVPTX/lower-aggr-copies.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/lower-aggr-copies.ll?rev=320989&r1=320988&r2=320989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/lower-aggr-copies.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/lower-aggr-copies.ll Mon Dec 18 07:31:14 2017
@@ -1,6 +1,5 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 | FileCheck %s --check-prefix PTX
; RUN: opt < %s -S -nvptx-lower-aggr-copies | FileCheck %s --check-prefix IR
-; RUN: opt < %s -S -nvptx-lower-aggr-copies -use-wide-memcpy-loop-lowering=true | FileCheck %s --check-prefix WIR
; Verify that the NVPTXLowerAggrCopies pass works as expected - calls to
; llvm.mem* intrinsics get lowered to loops.
@@ -18,13 +17,22 @@ entry:
ret i8* %dst
; IR-LABEL: @memcpy_caller
-; IR: [[CMPREG:%[0-9]+]] = icmp eq i64 0, %n
-; IR: br i1 [[CMPREG]], label %split, label %loadstoreloop
-; IR: loadstoreloop:
-; IR: [[LOADPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64
-; IR-NEXT: [[VAL:%[0-9]+]] = load i8, i8* [[LOADPTR]]
-; IR-NEXT: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
-; IR-NEXT: store i8 [[VAL]], i8* [[STOREPTR]]
+; IR: entry:
+; IR: [[Cond:%[0-9]+]] = icmp ne i64 %n, 0
+; IR: br i1 [[Cond]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
+
+; IR: loop-memcpy-expansion:
+; IR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %loop-memcpy-expansion ]
+; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
+; IR: [[Load:%[0-9]+]] = load i8, i8* [[SrcGep]]
+; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
+; IR: store i8 [[Load]], i8* [[DstGep]]
+; IR: [[IndexInc]] = add i64 %loop-index, 1
+; IR: [[Cond2:%[0-9]+]] = icmp ult i64 [[IndexInc]], %n
+; IR: br i1 [[Cond2]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
+
+; IR-LABEL: post-loop-memcpy-expansion:
+; IR: ret i8* %dst
; PTX-LABEL: .visible .func (.param .b64 func_retval0) memcpy_caller
; PTX: LBB[[LABEL:[_0-9]+]]:
@@ -34,23 +42,6 @@ entry:
; PTX: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
; PTX: @%p[[PRED]] bra LBB[[LABEL]]
-; WIR-LABEL: @memcpy_caller
-; WIR: entry:
-; WIR: [[Cond:%[0-9]+]] = icmp ne i64 %n, 0
-; WIR: br i1 [[Cond]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
-
-; WIR: loop-memcpy-expansion:
-; WIR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %loop-memcpy-expansion ]
-; WIR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; WIR: [[Load:%[0-9]+]] = load i8, i8* [[SrcGep]]
-; WIR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; WIR: store i8 [[Load]], i8* [[DstGep]]
-; WIR: [[IndexInc]] = add i64 %loop-index, 1
-; WIR: [[Cond2:%[0-9]+]] = icmp ult i64 [[IndexInc]], %n
-; WIR: br i1 [[Cond2]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
-
-; WIR-LABEL: post-loop-memcpy-expansion:
-; WIR: ret i8* %dst
}
define i8* @memcpy_volatile_caller(i8* %dst, i8* %src, i64 %n) #0 {
@@ -59,8 +50,23 @@ entry:
ret i8* %dst
; IR-LABEL: @memcpy_volatile_caller
-; IR: load volatile
-; IR: store volatile
+; IR: entry:
+; IR: [[Cond:%[0-9]+]] = icmp ne i64 %n, 0
+; IR: br i1 [[Cond]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
+
+; IR: loop-memcpy-expansion:
+; IR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %loop-memcpy-expansion ]
+; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
+; IR: [[Load:%[0-9]+]] = load volatile i8, i8* [[SrcGep]]
+; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
+; IR: store volatile i8 [[Load]], i8* [[DstGep]]
+; IR: [[IndexInc]] = add i64 %loop-index, 1
+; IR: [[Cond2:%[0-9]+]] = icmp ult i64 [[IndexInc]], %n
+; IR: br i1 [[Cond2]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
+
+; IR-LABEL: post-loop-memcpy-expansion:
+; IR: ret i8* %dst
+
; PTX-LABEL: .visible .func (.param .b64 func_retval0) memcpy_volatile_caller
; PTX: LBB[[LABEL:[_0-9]+]]:
@@ -69,24 +75,6 @@ entry:
; PTX: add.s64 %rd[[COUNTER:[0-9]+]], %rd{{[0-9]+}}, 1
; PTX: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
; PTX: @%p[[PRED]] bra LBB[[LABEL]]
-
-; WIR-LABEL: @memcpy_volatile_caller
-; WIR: entry:
-; WIR: [[Cond:%[0-9]+]] = icmp ne i64 %n, 0
-; WIR: br i1 [[Cond]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
-
-; WIR: loop-memcpy-expansion:
-; WIR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %loop-memcpy-expansion ]
-; WIR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; WIR: [[Load:%[0-9]+]] = load volatile i8, i8* [[SrcGep]]
-; WIR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; WIR: store volatile i8 [[Load]], i8* [[DstGep]]
-; WIR: [[IndexInc]] = add i64 %loop-index, 1
-; WIR: [[Cond2:%[0-9]+]] = icmp ult i64 [[IndexInc]], %n
-; WIR: br i1 [[Cond2]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
-
-; WIR-LABEL: post-loop-memcpy-expansion:
-; WIR: ret i8* %dst
}
define i8* @memcpy_casting_caller(i32* %dst, i32* %src, i64 %n) #0 {
@@ -102,12 +90,6 @@ entry:
; IR: [[SRCCAST:%[0-9]+]] = bitcast i32* %src to i8*
; IR: getelementptr inbounds i8, i8* [[SRCCAST]]
; IR: getelementptr inbounds i8, i8* [[DSTCAST]]
-
-; WIR-LABEL: @memcpy_casting_caller
-; WIR: [[DSTCAST:%[0-9]+]] = bitcast i32* %dst to i8*
-; WIR: [[SRCCAST:%[0-9]+]] = bitcast i32* %src to i8*
-; WIR: getelementptr inbounds i8, i8* [[SRCCAST]]
-; WIR: getelementptr inbounds i8, i8* [[DSTCAST]]
}
define i8* @memcpy_known_size(i8* %dst, i8* %src) {
@@ -116,18 +98,18 @@ entry:
ret i8* %dst
; Check that calls with compile-time constant size are handled correctly
-; WIR-LABEL: @memcpy_known_size
-; WIR: entry:
-; WIR: br label %load-store-loop
-; WIR: load-store-loop:
-; WIR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %load-store-loop ]
-; WIR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; WIR: [[Load:%[0-9]+]] = load i8, i8* [[SrcGep]]
-; WIR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; WIR: store i8 [[Load]], i8* [[DstGep]]
-; WIR: [[IndexInc]] = add i64 %loop-index, 1
-; WIR: [[Cond:%[0-9]+]] = icmp ult i64 %3, 144
-; WIR: br i1 [[Cond]], label %load-store-loop, label %memcpy-split
+; IR-LABEL: @memcpy_known_size
+; IR: entry:
+; IR: br label %load-store-loop
+; IR: load-store-loop:
+; IR: %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %load-store-loop ]
+; IR: [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
+; IR: [[Load:%[0-9]+]] = load i8, i8* [[SrcGep]]
+; IR: [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
+; IR: store i8 [[Load]], i8* [[DstGep]]
+; IR: [[IndexInc]] = add i64 %loop-index, 1
+; IR: [[Cond:%[0-9]+]] = icmp ult i64 %3, 144
+; IR: br i1 [[Cond]], label %load-store-loop, label %memcpy-split
}
define i8* @memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
More information about the llvm-commits
mailing list