[llvm] [NVPTX] Allow MemTransferInst in adjustByValArgAlignment (PR #112462)

Jinsong Ji via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 16 11:45:02 PDT 2024


https://github.com/jsji updated https://github.com/llvm/llvm-project/pull/112462

>From 11e103b87985bd8b905cab26503414f9cf693b95 Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jinsong.ji at intel.com>
Date: Tue, 15 Oct 2024 18:54:05 -0700
Subject: [PATCH 1/2] [NVPTX] Allow MemTransferInst in adjustByValArgAlignment

Before b7b28e770c46, AreSupportedUsers will skip
MemTransferInst.
In b7b28e770c46, we start to allow MemTransferInst,
we should allow it in adjustByValArgAlignment too.
---
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp    |  3 +++
 llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 14 ++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 3041c16c7a7604..bb76cfd6fdb7bd 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -435,6 +435,9 @@ static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS,
         continue;
       }
 
+      if (isa<MemTransferInst>(CurUser))
+        continue;
+
       // supported for grid_constant
       if (IsGridConstant &&
           (isa<CallInst>(CurUser) || isa<StoreInst>(CurUser) ||
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index a7dbc4c1620a5f..e21bef42ed4e7f 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -219,6 +219,20 @@ entry:
   ret void
 }
 
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
+define dso_local void @memcpy_from_param_noalign (ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) %s) local_unnamed_addr #0 {
+; COMMON-LABEL: define dso_local void @memcpy_from_param_noalign(
+; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; COMMON-NEXT:  [[ENTRY:.*:]]
+; COMMON-NEXT:    [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
+; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S1]], i64 16, i1 true)
+; COMMON-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %s, i64 16, i1 true)
+  ret void
+}
+
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
 define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef readnone byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
 ; COMMON-LABEL: define dso_local void @memcpy_to_param(

>From 8ab44ebcbff70456c8ceab5b648b84ac3ff6cf2f Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jinsong.ji at intel.com>
Date: Wed, 16 Oct 2024 11:38:14 -0700
Subject: [PATCH 2/2] mark newfunc as kernel

---
 ...ransferInst-in-adjustByValArgAlignme.patch | 58 +++++++++++++++++++
 llvm/test/CodeGen/NVPTX/lower-byval-args.ll   |  3 +-
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 llvm/0001-NVPTX-Allow-MemTransferInst-in-adjustByValArgAlignme.patch

diff --git a/llvm/0001-NVPTX-Allow-MemTransferInst-in-adjustByValArgAlignme.patch b/llvm/0001-NVPTX-Allow-MemTransferInst-in-adjustByValArgAlignme.patch
new file mode 100644
index 00000000000000..1bb90d94c378ff
--- /dev/null
+++ b/llvm/0001-NVPTX-Allow-MemTransferInst-in-adjustByValArgAlignme.patch
@@ -0,0 +1,58 @@
+From 6ca907d995bf39845af56b774c52613bbe125a5e Mon Sep 17 00:00:00 2001
+From: Jinsong Ji <jinsong.ji at intel.com>
+Date: Tue, 15 Oct 2024 18:54:05 -0700
+Subject: [PATCH] [NVPTX] Allow MemTransferInst in adjustByValArgAlignment
+
+Before b7b28e770c46, AreSupportedUsers will skip
+MemTransferInst.
+In b7b28e770c46, we start to allow MemTransferInst,
+we should allow it in adjustByValArgAlignment too.
+---
+ llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp    |  3 +++
+ llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 14 ++++++++++++++
+ 3 files changed, 17 insertions(+)
+ create mode 100644 llvm/t
+
+diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+index 3041c16c7a76..bb76cfd6fdb7 100644
+--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
++++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+@@ -435,6 +435,9 @@ static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS,
+         continue;
+       }
+ 
++      if (isa<MemTransferInst>(CurUser))
++        continue;
++
+       // supported for grid_constant
+       if (IsGridConstant &&
+           (isa<CallInst>(CurUser) || isa<StoreInst>(CurUser) ||
+index 000000000000..e69de29bb2d1
+diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+index a7dbc4c1620a..e21bef42ed4e 100644
+--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
++++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+@@ -219,6 +219,20 @@ entry:
+   ret void
+ }
+ 
++; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
++define dso_local void @memcpy_from_param_noalign (ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) %s) local_unnamed_addr #0 {
++; COMMON-LABEL: define dso_local void @memcpy_from_param_noalign(
++; COMMON-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
++; COMMON-NEXT:  [[ENTRY:.*:]]
++; COMMON-NEXT:    [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
++; COMMON-NEXT:    call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S1]], i64 16, i1 true)
++; COMMON-NEXT:    ret void
++;
++entry:
++  tail call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %s, i64 16, i1 true)
++  ret void
++}
++
+ ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
+ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef readnone byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
+ ; COMMON-LABEL: define dso_local void @memcpy_to_param(
+-- 
+2.39.1
+
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index e21bef42ed4e7f..f78fecbac89143 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -440,7 +440,7 @@ attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite
 attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
 
 !llvm.module.flags = !{!0, !1, !2, !3}
-!nvvm.annotations = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19}
+!nvvm.annotations = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !23}
 !llvm.ident = !{!20, !21}
 
 !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 8]}
@@ -465,3 +465,4 @@ attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
 !19 = !{ptr @test_select_write, !"kernel", i32 1}
 !20 = !{!"clang version 20.0.0git"}
 !21 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
+!23 = !{ptr @memcpy_from_param_noalign, !"kernel", i32 1}



More information about the llvm-commits mailing list