[PATCH] D152317: [NVPTX] Allow using v4i32 for memcpy lowering.
Artem Belevich via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 7 13:22:20 PDT 2023
tra updated this revision to Diff 529416.
tra added a comment.
Removed unnecessary lowering customization -- we already set it for all fixed
vector types.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D152317/new/
https://reviews.llvm.org/D152317
Files:
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/lib/Target/NVPTX/NVPTXISelLowering.h
llvm/test/CodeGen/NVPTX/intrinsics.ll
Index: llvm/test/CodeGen/NVPTX/intrinsics.ll
===================================================================
--- llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -133,6 +133,46 @@
ret i64 %ret
}
+%struct.S = type { [4 x i64] }
+
+; CHECK-LABEL: test_memcpy
+define dso_local void @test_memcpy(ptr noundef %dst, ptr noundef %src) #0 {
+; CHECK-DAG: ld.param.u{{32|64}} %[[D:(r|rd)[0-9]+]], [test_memcpy_param_0];
+; CHECK-DAG: ld.param.u{{32|64}} %[[S:(r|rd)[0-9]+]], [test_memcpy_param_1];
+; CHECK-DAG: ld.u8 %[[V30:rs[0-9]+]], [%[[S]]+30];
+; CHECK-DAG: st.u8 [%[[D]]+30], %[[V30]];
+; CHECK-DAG: ld.u16 %[[V28:rs[0-9]+]], [%[[S]]+28];
+; CHECK-DAG: st.u16 [%[[D]]+28], %[[V28]];
+; CHECK-DAG: ld.u32 %[[V24:r[0-9]+]], [%[[S]]+24];
+; CHECK-DAG: st.u32 [%[[D]]+24], %[[V24]];
+; CHECK-DAG: ld.u64 %[[V16:rd[0-9]+]], [%[[S]]+16];
+; CHECK-DAG: st.u64 [%[[D]]+16], %[[V16]];
+; CHECK-DAG: ld.v4.u32 {[[V0:%r[0-9]+, %r[0-9]+, %r[0-9]+, %r[0-9]+]]}, [%[[S]]];
+; CHECK-DAG: st.v4.u32 [%[[D]]], {[[V0]]};
+ call void @llvm.memcpy.p0.p0.i64(ptr align 16 %dst, ptr align 16 %src, i64 31, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: test_memcpy_a8
+define dso_local void @test_memcpy_a8(ptr noundef %dst, ptr noundef %src) #0 {
+; CHECK-DAG: ld.param.u{{32|64}} %[[D:(r|rd)[0-9]+]], [test_memcpy_a8_param_0];
+; CHECK-DAG: ld.param.u{{32|64}} %[[S:(r|rd)[0-9]+]], [test_memcpy_a8_param_1];
+; CHECK-DAG: ld.u8 %[[V30:rs[0-9]+]], [%[[S]]+30];
+; CHECK-DAG: st.u8 [%[[D]]+30], %[[V30]];
+; CHECK-DAG: ld.u16 %[[V28:rs[0-9]+]], [%[[S]]+28];
+; CHECK-DAG: st.u16 [%[[D]]+28], %[[V28]];
+; CHECK-DAG: ld.u32 %[[V24:r[0-9]+]], [%[[S]]+24];
+; CHECK-DAG: st.u32 [%[[D]]+24], %[[V24]];
+; CHECK-DAG: ld.u64 %[[V16:rd[0-9]+]], [%[[S]]+16];
+; CHECK-DAG: st.u64 [%[[D]]+16], %[[V16]];
+; CHECK-DAG: ld.u64 %[[V8:rd[0-9]+]], [%[[S]]+8];
+; CHECK-DAG: st.u64 [%[[D]]+8], %[[V8]];
+; CHECK-DAG: ld.u64 %[[V0:rd[0-9]+]], [%[[S]]];
+; CHECK-DAG: st.u64 [%[[D]]], %[[V0]];
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 31, i1 false)
+ ret void
+}
+
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
@@ -142,6 +182,7 @@
declare i16 @llvm.ctpop.i16(i16)
declare i32 @llvm.ctpop.i32(i32)
declare i64 @llvm.ctpop.i64(i64)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
Index: llvm/lib/Target/NVPTX/NVPTXISelLowering.h
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -572,6 +572,14 @@
// instruction, so we say that ctlz is cheap to speculate.
bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; }
+ EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
+
+ EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
+ if (VT == MVT::v4i32)
+ return VT;
+ return TargetLoweringBase::getTypeToTransformTo(Context, VT);
+ }
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
return AtomicExpansionKind::None;
}
Index: llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5454,6 +5454,16 @@
return AtomicExpansionKind::CmpXChg;
}
+EVT NVPTXTargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
+
+ if (Op.size() >= 16 && Op.isDstAligned(Align(16)))
+ return MVT::v4i32;
+
+ // Use the default.
+ return MVT::Other;
+}
+
// Pin NVPTXTargetObjectFile's vtables to this file.
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D152317.529416.patch
Type: text/x-patch
Size: 4235 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230607/122df665/attachment.bin>
More information about the llvm-commits
mailing list