[llvm] 5c315be - [DSE] Transform memset + malloc --> calloc (PR25892)
Dawid Jurczak via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 29 09:34:39 PDT 2021
Author: Dawid Jurczak
Date: 2021-07-29T18:34:10+02:00
New Revision: 5c315bee8c9db27d12cead928eea5a3fef97f34f
URL: https://github.com/llvm/llvm-project/commit/5c315bee8c9db27d12cead928eea5a3fef97f34f
DIFF: https://github.com/llvm/llvm-project/commit/5c315bee8c9db27d12cead928eea5a3fef97f34f.diff
LOG: [DSE] Transform memset + malloc --> calloc (PR25892)
After this change DSE can eliminate malloc + memset and emit calloc.
It's https://reviews.llvm.org/D101440 follow-up.
Differential Revision: https://reviews.llvm.org/D103009
Added:
Modified:
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index d22b3f409585c..0ada5c6e72c93 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -56,6 +56,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -78,6 +79,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -505,7 +507,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
BasicBlock::iterator SecondBBI(SecondI);
BasicBlock *FirstBB = FirstI->getParent();
BasicBlock *SecondBB = SecondI->getParent();
- MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+ MemoryLocation MemLoc;
+ if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
+ MemLoc = MemoryLocation::getForDest(MemSet);
+ else
+ MemLoc = MemoryLocation::get(SecondI);
+
auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
// Start checking the SecondBB.
@@ -819,14 +826,17 @@ bool isNoopIntrinsic(Instruction *I) {
}
// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
+ const TargetLibraryInfo &TLI) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
if (auto *CB = dyn_cast<CallBase>(DI))
- if (CB->onlyAccessesInaccessibleMemory())
+ if (CB->onlyAccessesInaccessibleMemory()) {
+ if (isAllocLikeFn(DI, &TLI))
+ return false;
return true;
-
+ }
// We can eliminate stores to locations not visible to the caller across
// throwing instructions.
if (DI->mayThrow() && !DefVisibleToCaller)
@@ -841,7 +851,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return true;
// Skip intrinsics that do not really read or modify memory.
- if (isNoopIntrinsic(D->getMemoryInst()))
+ if (isNoopIntrinsic(DI))
return true;
return false;
@@ -1389,7 +1399,7 @@ struct DSEState {
MemoryDef *CurrentDef = cast<MemoryDef>(Current);
Instruction *CurrentI = CurrentDef->getMemoryInst();
- if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO)))
+ if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO), TLI))
continue;
// Before we try to remove anything, check for any extra throwing
@@ -1816,13 +1826,58 @@ struct DSEState {
if (StoredConstant && StoredConstant->isNullValue()) {
auto *DefUOInst = dyn_cast<Instruction>(DefUO);
- if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
- auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
- // If UnderlyingDef is the clobbering access of Def, no instructions
- // between them can modify the memory location.
- auto *ClobberDef =
- MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
- return UnderlyingDef == ClobberDef;
+ if (DefUOInst) {
+ if (isCallocLikeFn(DefUOInst, &TLI)) {
+ auto *UnderlyingDef =
+ cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
+ // If UnderlyingDef is the clobbering access of Def, no instructions
+ // between them can modify the memory location.
+ auto *ClobberDef =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
+ return UnderlyingDef == ClobberDef;
+ }
+
+ if (MemSet) {
+ if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.getName() == "calloc")
+ return false;
+ auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
+ if (!Malloc)
+ return false;
+ auto *InnerCallee = Malloc->getCalledFunction();
+ if (!InnerCallee)
+ return false;
+ LibFunc Func;
+ if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ Func != LibFunc_malloc)
+ return false;
+ if (Malloc->getOperand(0) == MemSet->getLength()) {
+ if (DT.dominates(Malloc, MemSet) &&
+ memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
+ IRBuilder<> IRB(Malloc);
+ const auto &DL = Malloc->getModule()->getDataLayout();
+ AttributeList EmptyList;
+ if (auto *Calloc = emitCalloc(
+ ConstantInt::get(IRB.getIntPtrTy(DL), 1),
+ Malloc->getArgOperand(0), EmptyList, IRB, TLI)) {
+ MemorySSAUpdater Updater(&MSSA);
+ auto *LastDef = cast<MemoryDef>(
+ Updater.getMemorySSA()->getMemoryAccess(Malloc));
+ auto *NewAccess = Updater.createMemoryAccessAfter(
+ cast<Instruction>(Calloc), LastDef, LastDef);
+ auto *NewAccessMD = cast<MemoryDef>(NewAccess);
+ Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
+ Updater.removeMemoryAccess(Malloc);
+ Malloc->replaceAllUsesWith(Calloc);
+ Malloc->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+ }
+ }
}
}
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 184653982a6a1..12534b6047c57 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -1,9 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basic-aa -dse -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='dse,verify<memoryssa>' -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-declare i8* @calloc(i64, i64)
declare void @memset_pattern16(i8*, i8*, i64)
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
@@ -309,6 +308,156 @@ entry:
ret void
}
+declare noalias i8* @malloc(i64)
+declare noalias i8* @_Znwm(i64)
+declare void @clobber_memory(float*)
+
+; based on pr25892_lite
+define i8* @zero_memset_after_malloc(i64 %size) {
+; CHECK-LABEL: @zero_memset_after_malloc(
+; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
+; CHECK-NEXT: ret i8* [[CALL]]
+;
+ %call = call i8* @malloc(i64 %size) inaccessiblememonly
+ call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+ ret i8* %call
+}
+
+; based on pr25892_lite
+define i8* @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
+; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering(
+; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]])
+; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float*
+; CHECK-NEXT: call void @clobber_memory(float* [[BC]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: ret i8* [[CALL]]
+;
+ %call = call i8* @malloc(i64 %size) inaccessiblememonly
+ %bc = bitcast i8* %call to float*
+ call void @clobber_memory(float* %bc)
+ call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+ ret i8* %call
+}
+
+; based on pr25892_lite
+define i8* @zero_memset_after_malloc_with_
diff erent_sizes(i64 %size) {
+; CHECK-LABEL: @zero_memset_after_malloc_with_
diff erent_sizes(
+; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]])
+; CHECK-NEXT: [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE2]], i1 false)
+; CHECK-NEXT: ret i8* [[CALL]]
+;
+ %call = call i8* @malloc(i64 %size) inaccessiblememonly
+ %size2 = add nsw i64 %size, -1
+ call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size2, i1 false)
+ ret i8* %call
+}
+
+; based on pr25892_lite
+define i8* @zero_memset_after_new(i64 %size) {
+; CHECK-LABEL: @zero_memset_after_new(
+; CHECK-NEXT: [[CALL:%.*]] = call i8* @_Znwm(i64 [[SIZE:%.*]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: ret i8* [[CALL]]
+;
+ %call = call i8* @_Znwm(i64 %size)
+ call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+ ret i8* %call
+}
+
+; This should not create a calloc and should not crash the compiler.
+define i8* @notmalloc_memset(i64 %size, i8*(i64)* %notmalloc) {
+; CHECK-LABEL: @notmalloc_memset(
+; CHECK-NEXT: [[CALL1:%.*]] = call i8* [[NOTMALLOC:%.*]](i64 [[SIZE:%.*]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL1]], i8 0, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: ret i8* [[CALL1]]
+;
+ %call1 = call i8* %notmalloc(i64 %size)
+ call void @llvm.memset.p0i8.i64(i8* %call1, i8 0, i64 %size, i1 false)
+ ret i8* %call1
+}
+
+; This should not create recursive call to calloc.
+define i8* @calloc(i64 %nmemb, i64 %size) {
+; CHECK-LABEL: @calloc(
+; CHECK: entry:
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SIZE:%.*]], [[NMEMB:%.*]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call noalias align 16 i8* @malloc(i64 [[MUL]])
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[CALL]], i8 0, i64 [[MUL]], i1 false)
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: ret i8* [[CALL]]
+;
+entry:
+ %mul = mul i64 %size, %nmemb
+ %call = tail call noalias align 16 i8* @malloc(i64 %mul)
+ %tobool.not = icmp eq i8* %call, null
+ br i1 %tobool.not, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %call, i8 0, i64 %mul, i1 false)
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i8* %call
+}
+
+define float* @pr25892(i64 %size) {
+; CHECK-LABEL: @pr25892(
+; CHECK: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float*
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret float* [[RETVAL_0]]
+;
+entry:
+ %call = call i8* @malloc(i64 %size) inaccessiblememonly
+ %cmp = icmp eq i8* %call, null
+ br i1 %cmp, label %cleanup, label %if.end
+if.end:
+ %bc = bitcast i8* %call to float*
+ call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+ br label %cleanup
+cleanup:
+ %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+ ret float* %retval.0
+}
+
+define float* @pr25892_with_extra_store(i64 %size) {
+; CHECK-LABEL: @pr25892_with_extra_store(
+; CHECK: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float*
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret float* [[RETVAL_0]]
+;
+entry:
+ %call = call i8* @malloc(i64 %size) inaccessiblememonly
+ %cmp = icmp eq i8* %call, null
+ br i1 %cmp, label %cleanup, label %if.end
+if.end:
+ %bc = bitcast i8* %call to float*
+ call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
+ store i8 0, i8* %call, align 1
+ br label %cleanup
+cleanup:
+ %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+ ret float* %retval.0
+}
+
; PR50143
define i8* @store_zero_after_calloc_inaccessiblememonly() {
; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly(
More information about the llvm-commits
mailing list