[llvm] 6d2e5c3 - LowerMemIntrinsics: Skip memmove with different address spaces
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 10 09:28:11 PDT 2023
Author: Matt Arsenault
Date: 2023-06-10T12:28:05-04:00
New Revision: 6d2e5c3445c1b9509e1856601913b0a01bfb2864
URL: https://github.com/llvm/llvm-project/commit/6d2e5c3445c1b9509e1856601913b0a01bfb2864
DIFF: https://github.com/llvm/llvm-project/commit/6d2e5c3445c1b9509e1856601913b0a01bfb2864.diff
LOG: LowerMemIntrinsics: Skip memmove with different address spaces
This is a quick fix for an assert when the source and dest have
different address spaces. The pointer compare needs to have matching
types, but we can't generically introduce addrspacecast and we don't
know if the address spaces alias.
Added:
Modified:
llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
index d0c27d198ae43..4bf4df02b0be3 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@@ -50,8 +50,9 @@ void createMemCpyLoopKnownSize(
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI,
ScalarEvolution *SE = nullptr);
-/// Expand \p MemMove as a loop. \p MemMove is not deleted.
-void expandMemMoveAsLoop(MemMoveInst *MemMove);
+/// Expand \p MemMove as a loop. \p MemMove is not deleted. Returns true if the
+/// memmove was lowered.
+bool expandMemMoveAsLoop(MemMoveInst *MemMove);
/// Expand \p MemSet as a loop. \p MemSet is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet);
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 0bdb6b59d3ac5..0940c34ef8973 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -222,9 +222,10 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
LookupLibInfo(*ParentFunc).has(LibFunc_memmove))
break;
- expandMemMoveAsLoop(Memmove);
- Changed = true;
- Memmove->eraseFromParent();
+ if (expandMemMoveAsLoop(Memmove)) {
+ Changed = true;
+ Memmove->eraseFromParent();
+ }
}
break;
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 165740b552987..df15ec765bdc6 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -12,9 +12,12 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <optional>
+#define DEBUG_TYPE "lower-mem-intrinsics"
+
using namespace llvm;
void llvm::createMemCpyLoopKnownSize(
@@ -373,7 +376,7 @@ void llvm::createMemCpyLoopUnknownSize(
// }
// return dst;
// }
-static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
+static bool createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
Value *DstAddr, Value *CopyLen, Align SrcAlign,
Align DstAlign, bool SrcIsVolatile,
bool DstIsVolatile) {
@@ -385,10 +388,16 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
// TODO: Use
diff erent element type if possible?
IRBuilder<> CastBuilder(InsertBefore);
Type *EltTy = CastBuilder.getInt8Ty();
- Type *PtrTy =
- CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace());
- SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy);
- DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy);
+
+ // FIXME: We don't know generically if it's legal to introduce an
+ // addrspacecast. We need to know either if it's legal to insert an
+ // addrspacecast, or if the address spaces cannot alias.
+ if (SrcAddr->getType()->getPointerAddressSpace() !=
+ DstAddr->getType()->getPointerAddressSpace()) {
+ LLVM_DEBUG(dbgs() << "Do not know how to expand memmove between
diff erent "
+ "address spaces\n");
+ return false;
+ }
// Create the a comparison of src and dst, based on which we jump to either
// the forward-copy part of the function (if src >= dst) or the backwards-copy
@@ -464,6 +473,7 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
ElseTerm->eraseFromParent();
+ return true;
}
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
@@ -552,15 +562,16 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
}
}
-void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
- createMemMoveLoop(/* InsertBefore */ Memmove,
- /* SrcAddr */ Memmove->getRawSource(),
- /* DstAddr */ Memmove->getRawDest(),
- /* CopyLen */ Memmove->getLength(),
- /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(),
- /* DestAlign */ Memmove->getDestAlign().valueOrOne(),
- /* SrcIsVolatile */ Memmove->isVolatile(),
- /* DstIsVolatile */ Memmove->isVolatile());
+bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
+ return createMemMoveLoop(
+ /* InsertBefore */ Memmove,
+ /* SrcAddr */ Memmove->getRawSource(),
+ /* DstAddr */ Memmove->getRawDest(),
+ /* CopyLen */ Memmove->getLength(),
+ /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(),
+ /* DestAlign */ Memmove->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ Memmove->isVolatile(),
+ /* DstIsVolatile */ Memmove->isVolatile());
}
void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
index 78280a971c35c..c2fbe2d276afa 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
@@ -11,6 +11,12 @@ declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3)
declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
+declare void @llvm.memmove.p0.p1.i64(ptr nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
+declare void @llvm.memmove.p1.p0.i64(ptr addrspace(1) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1
+declare void @llvm.memmove.p5.p1.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
+declare void @llvm.memmove.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #1
+declare void @llvm.memmove.p0.p5.i64(ptr nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #1
+declare void @llvm.memmove.p5.p0.i64(ptr addrspace(5) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1
declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) #1
@@ -1307,5 +1313,59 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1(ptr addrspace(1)
ret void
}
+define amdgpu_kernel void @memmove_flat_align1_global_align1(ptr %dst, ptr addrspace(1) %src) {
+; OPT-LABEL: @memmove_flat_align1_global_align1(
+; OPT-NEXT: call void @llvm.memmove.p0.p1.i64(ptr [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 256, i1 false)
+; OPT-NEXT: ret void
+;
+ call void @llvm.memmove.p0.p1.i64(ptr %dst, ptr addrspace(1) %src, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memmove_global_align1_flat_align1(ptr addrspace(1) %dst, ptr %src) {
+; OPT-LABEL: @memmove_global_align1_flat_align1(
+; OPT-NEXT: call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) [[DST:%.*]], ptr [[SRC:%.*]], i64 256, i1 false)
+; OPT-NEXT: ret void
+;
+ call void @llvm.memmove.p1.p0.i64(ptr addrspace(1) %dst, ptr %src, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memmove_flat_align1_private_align1(ptr %dst, ptr addrspace(5) %src) {
+; OPT-LABEL: @memmove_flat_align1_private_align1(
+; OPT-NEXT: call void @llvm.memmove.p0.p5.i64(ptr [[DST:%.*]], ptr addrspace(5) [[SRC:%.*]], i64 256, i1 false)
+; OPT-NEXT: ret void
+;
+ call void @llvm.memmove.p0.p5.i64(ptr %dst, ptr addrspace(5) %src, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memmove_private_align1_flat_align1(ptr addrspace(5) %dst, ptr %src) {
+; OPT-LABEL: @memmove_private_align1_flat_align1(
+; OPT-NEXT: call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) [[DST:%.*]], ptr [[SRC:%.*]], i64 256, i1 false)
+; OPT-NEXT: ret void
+;
+ call void @llvm.memmove.p5.p0.i64(ptr addrspace(5) %dst, ptr %src, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memmove_private_align1_global_align1(ptr addrspace(5) %dst, ptr addrspace(1) %src) {
+; OPT-LABEL: @memmove_private_align1_global_align1(
+; OPT-NEXT: call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 256, i1 false)
+; OPT-NEXT: ret void
+;
+ call void @llvm.memmove.p5.p1.i64(ptr addrspace(5) %dst, ptr addrspace(1) %src, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @memmove_global_align1_private_align1(ptr addrspace(1) %dst, ptr addrspace(5) %src) {
+; OPT-LABEL: @memmove_global_align1_private_align1(
+; OPT-NEXT: call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(5) [[SRC:%.*]], i64 256, i1 false)
+; OPT-NEXT: ret void
+;
+ call void @llvm.memmove.p1.p5.i64(ptr addrspace(1) %dst, ptr addrspace(5) %src, i64 256, i1 false)
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
More information about the llvm-commits
mailing list