[PATCH] D98606: [NVPTX] CUDA does provide malloc/free since compute capability 2.X

Mon Mar 15 14:00:12 PDT 2021

jdoerfert updated this revision to Diff 330801.
jdoerfert added a comment.
Herald added a subscriber: jholewinski.

Add tests, merge D98607 <https://reviews.llvm.org/D98607> into this as comment.
TBH. This patch doesn't impact the CodeGen test but only the instcombine test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98606/new/

https://reviews.llvm.org/D98606

Files:
  llvm/lib/Analysis/TargetLibraryInfo.cpp
  llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
  llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll


Index: llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll
===================================================================

--- /dev/null
+++ llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Ensure the nvptx backend states malloc & free are a thing so we can recognize 
+; and optimize them properly.
+target triple = "nvptx64"
+
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
+define void @malloc_then_free() {
+; CHECK-LABEL: @malloc_then_free(
+; CHECK-NEXT:    ret void
+;
+  %a = call i8* @malloc(i64 4)
+  store i8 0, i8* %a
+  call void @free(i8* %a)
+  ret void
+}
+
Index: llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
===================================================================
--- llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
+++ llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
@@ -29,3 +29,32 @@
 define i128 @__umodti3(i128, i128) {
   ret i128 0
 }
+
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
+define void @malloc_then_free() {
+; CHECK:  call.uni (retval0),
+; CHECK:  malloc,
+; CHECK:  (
+; CHECK:  param0
+; CHECK:  );
+; CHECK:  ld.param.b32 %r1, [retval0+0];
+; CHECK:  } // callseq 1
+; CHECK:  mov.u16 %rs1, 0;
+; CHECK:  st.u8 [%r1], %rs1;
+; CHECK:  { // callseq 2, 0
+; CHECK:  .reg .b32 temp_param_reg;
+; CHECK:  .param .b32 param0;
+; CHECK:  st.param.b32 [param0+0], %r1;
+; CHECK:  call.uni
+; CHECK:  free,
+; CHECK:  (
+; CHECK:  param0
+; CHECK:  );
+; CHECK:  } // callseq 2
+  %a = call i8* @malloc(i64 4)
+  store i8 0, i8* %a
+  call void @free(i8* %a)
+  ret void
+}
Index: llvm/lib/Analysis/TargetLibraryInfo.cpp
===================================================================
--- llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -547,6 +547,17 @@
   if (T.isNVPTX()) {
     TLI.disableAllFunctions();
     TLI.setAvailable(LibFunc_nvvm_reflect);
+    TLI.setAvailable(llvm::LibFunc_malloc);
+    TLI.setAvailable(llvm::LibFunc_free);
+
+    // TODO: We could enable the following two according to [0] but we haven't
+    //       done an evaluation wrt. the performance implications.
+    // [0]
+    // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations
+    //
+    //    TLI.setAvailable(llvm::LibFunc_memcpy);
+    //    TLI.setAvailable(llvm::LibFunc_memset);
+
   } else {
     TLI.setUnavailable(LibFunc_nvvm_reflect);
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D98606.330801.patch
Type: text/x-patch
Size: 2565 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210315/bf02d3d8/attachment.bin>