[PATCH] D98606: [NVPTX] CUDA does provide malloc/free since compute capability 2.X
Johannes Doerfert via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 15 20:46:12 PDT 2021
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf40a2c3befc8: [NVPTX] CUDA does provide malloc/free since compute capability 2.X (authored by jdoerfert).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D98606/new/
https://reviews.llvm.org/D98606
Files:
llvm/lib/Analysis/TargetLibraryInfo.cpp
llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll
Index: llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/malloc_free_delete_nvptx.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target triple = "nvptx64"
+
+declare void @user(i8*)
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
+; Ensure the nvptx backend states malloc & free are a thing so we can recognize
+; so we will optimize them properly. In the test below the malloc-free chain is
+; useless and we can remove it *if* we know about malloc & free.
+define void @malloc_then_free_not_needed() {
+; CHECK-LABEL: @malloc_then_free_not_needed(
+; CHECK-NEXT: ret void
+;
+ %a = call i8* @malloc(i64 4)
+ store i8 0, i8* %a
+ call void @free(i8* %a)
+ ret void
+}
+
+define void @malloc_then_free_needed() {
+; CHECK-LABEL: @malloc_then_free_needed(
+; CHECK-NEXT: [[A:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
+; CHECK-NEXT: call void @user(i8* [[A]])
+; CHECK-NEXT: call void @free(i8* [[A]])
+; CHECK-NEXT: ret void
+;
+ %a = call i8* @malloc(i64 4)
+ call void @user(i8* %a)
+ call void @free(i8* %a)
+ ret void
+}
Index: llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
===================================================================
--- llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
+++ llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
@@ -1,6 +1,9 @@
; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
; Allow to make libcalls that are defined in the current module
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
; Underlying libcall declaration
; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3
@@ -29,3 +32,14 @@
define i128 @__umodti3(i128, i128) {
ret i128 0
}
+
+define void @malloc_then_free() {
+; CHECK: call.uni (retval0),
+; CHECK: malloc,
+; CHECK: call.uni
+; CHECK: free,
+ %a = call i8* @malloc(i64 4)
+ store i8 0, i8* %a
+ call void @free(i8* %a)
+ ret void
+}
Index: llvm/lib/Analysis/TargetLibraryInfo.cpp
===================================================================
--- llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -547,6 +547,17 @@
if (T.isNVPTX()) {
TLI.disableAllFunctions();
TLI.setAvailable(LibFunc_nvvm_reflect);
+ TLI.setAvailable(llvm::LibFunc_malloc);
+ TLI.setAvailable(llvm::LibFunc_free);
+
+ // TODO: We could enable the following two according to [0] but we haven't
+ // done an evaluation wrt. the performance implications.
+ // [0]
+ // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations
+ //
+ // TLI.setAvailable(llvm::LibFunc_memcpy);
+ // TLI.setAvailable(llvm::LibFunc_memset);
+
} else {
TLI.setUnavailable(LibFunc_nvvm_reflect);
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D98606.330872.patch
Type: text/x-patch
Size: 2978 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210316/2e7982af/attachment.bin>
More information about the llvm-commits
mailing list