[llvm] r355672 - [SelectionDAG] Allow the user to specify a memeq function.
Clement Courbet via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 8 01:07:45 PST 2019
Author: courbet
Date: Fri Mar 8 01:07:45 2019
New Revision: 355672
URL: http://llvm.org/viewvc/llvm-project?rev=355672&view=rev
Log:
[SelectionDAG] Allow the user to specify a memeq function.
Summary:
Right now, when we encounter a string equality check,
e.g. `if (memcmp(a, b, s) == 0)`, we try to expand to a comparison if `s` is a
small compile-time constant, and fall back on calling `memcmp()` else.
This is sub-optimal because memcmp has to compute much more than
equality.
This patch replaces `memcmp(a, b, s) == 0` by `bcmp(a, b, s) == 0` on platforms
that support `bcmp`.
`bcmp` can be made much more efficient than `memcmp` because equality
compare is trivially parallel while lexicographic ordering has a chain
dependency.
Subscribers: fedor.sergeev, jyknight, ckennelly, gchatelet, llvm-commits
Differential Revision: https://reviews.llvm.org/D56593
Modified:
llvm/trunk/docs/ReleaseNotes.rst
llvm/trunk/include/llvm/Transforms/Utils/BuildLibCalls.h
llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp
llvm/trunk/lib/Transforms/Utils/BuildLibCalls.cpp
llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
llvm/trunk/test/CodeGen/X86/memcmp.ll
llvm/trunk/test/Transforms/InferFunctionAttrs/annotate.ll
llvm/trunk/test/Transforms/InstCombine/memcmp-1.ll
llvm/trunk/test/Transforms/InstCombine/strcmp-1.ll
Modified: llvm/trunk/docs/ReleaseNotes.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/ReleaseNotes.rst?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/docs/ReleaseNotes.rst (original)
+++ llvm/trunk/docs/ReleaseNotes.rst Fri Mar 8 01:07:45 2019
@@ -40,6 +40,11 @@ Non-comprehensive list of changes in thi
functionality, or simply have a lot to talk about), see the `NOTE` below
for adding a new subsection.
+* The optimizer will now convert calls to memcmp into a calls to bcmp in some
+ circumstances. Users who are building freestanding code (not depending on the
+ platform's libc) without specifying -ffreestanding may need to either pass
+ -fno-builtin-bcmp, or provide a bcmp function.
+
.. NOTE
If you would like to document a larger change, then you can add a
subsection about it right here. You can copy the following boilerplate
Modified: llvm/trunk/include/llvm/Transforms/Utils/BuildLibCalls.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/BuildLibCalls.h?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/BuildLibCalls.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/BuildLibCalls.h Fri Mar 8 01:07:45 2019
@@ -92,6 +92,10 @@ namespace llvm {
Value *emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI);
+ /// Emit a call to the bcmp function.
+ Value *emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI);
+
/// Emit a call to the unary function named 'Name' (e.g. 'floor'). This
/// function is known to take a single of type matching 'Op' and returns one
/// value with the same type. If 'Op' is a long double, 'l' is added as the
Modified: llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp Fri Mar 8 01:07:45 2019
@@ -49,6 +49,16 @@ static bool hasSinCosPiStret(const Tripl
return true;
}
+static bool hasBcmp(const Triple &TT) {
+ // Posix removed support from bcmp() in 2001, but the glibc and several
+ // implementations of the libc still have it.
+ if (TT.isOSLinux())
+ return TT.isGNUEnvironment() || TT.isMusl();
+ // Both NetBSD and OpenBSD are planning to remove the function. Windows does
+ // not have it.
+ return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin();
+}
+
/// Initialize the set of available library functions based on the specified
/// target triple. This should be carefully written so that a missing target
/// triple gets a sane set of defaults.
@@ -141,6 +151,9 @@ static void initialize(TargetLibraryInfo
TLI.setUnavailable(LibFunc_sincospif_stret);
}
+ if (!hasBcmp(T))
+ TLI.setUnavailable(LibFunc_bcmp);
+
if (T.isMacOSX() && T.getArch() == Triple::x86 &&
!T.isMacOSXVersionLT(10, 7)) {
// x86-32 OSX has a scheme where fwrite and fputs (and some other functions
Modified: llvm/trunk/lib/Transforms/Utils/BuildLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/BuildLibCalls.cpp?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/BuildLibCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/BuildLibCalls.cpp Fri Mar 8 01:07:45 2019
@@ -930,28 +930,41 @@ Value *llvm::emitMemChr(Value *Ptr, Valu
return CI;
}
-Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_memcmp))
+// Common code for memcmp() and bcmp(), which have the exact same properties,
+// just a slight difference in semantics.
+static Value *emitMemCmpOrBcmp(llvm::LibFunc libfunc, Value *Ptr1, Value *Ptr2,
+ Value *Len, IRBuilder<> &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(libfunc))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- StringRef MemCmpName = TLI->getName(LibFunc_memcmp);
+ StringRef CmpFnName = TLI->getName(libfunc);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- FunctionCallee MemCmp =
- M->getOrInsertFunction(MemCmpName, B.getInt32Ty(), B.getInt8PtrTy(),
+ FunctionCallee CmpFn =
+ M->getOrInsertFunction(CmpFnName, B.getInt32Ty(), B.getInt8PtrTy(),
B.getInt8PtrTy(), DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, MemCmpName, *TLI);
+ inferLibFuncAttributes(M, CmpFnName, *TLI);
CallInst *CI = B.CreateCall(
- MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, MemCmpName);
+ CmpFn, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, CmpFnName);
if (const Function *F =
- dyn_cast<Function>(MemCmp.getCallee()->stripPointerCasts()))
+ dyn_cast<Function>(CmpFn.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
+Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ return emitMemCmpOrBcmp(LibFunc_memcmp, Ptr1, Ptr2, Len, B, DL, TLI);
+}
+
+Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ return emitMemCmpOrBcmp(LibFunc_bcmp, Ptr1, Ptr2, Len, B, DL, TLI);
+}
+
/// Append a suffix to the function name according to the type of 'Op'.
static void appendTypeSuffix(Value *Op, StringRef &Name,
SmallString<20> &NameBuffer) {
Modified: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp Fri Mar 8 01:07:45 2019
@@ -831,18 +831,9 @@ Value *LibCallSimplifier::optimizeMemChr
return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
}
-Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
- Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
-
- if (LHS == RHS) // memcmp(s,s,x) -> 0
- return Constant::getNullValue(CI->getType());
-
- // Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!LenC)
- return nullptr;
-
- uint64_t Len = LenC->getZExtValue();
+static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
+ uint64_t Len, IRBuilder<> &B,
+ const DataLayout &DL) {
if (Len == 0) // memcmp(s1,s2,0) -> 0
return Constant::getNullValue(CI->getType());
@@ -912,6 +903,28 @@ Value *LibCallSimplifier::optimizeMemCmp
Ret = 1;
return ConstantInt::get(CI->getType(), Ret);
}
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Handle constant lengths.
+ if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
+ if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
+ LenC->getZExtValue(), B, DL))
+ return Res;
+
+ // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
+ // `bcmp` can be more efficient than memcmp because it only has to know that
+ // there is a difference, not where is is.
+ if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) {
+ return emitBCmp(LHS, RHS, Size, B, DL, TLI);
+ }
return nullptr;
}
@@ -1137,10 +1150,10 @@ static Value *optimizeTrigReflections(Ca
IRBuilder<> &B) {
if (!isa<FPMathOperator>(Call))
return nullptr;
-
+
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(Call->getFastMathFlags());
-
+
// TODO: Can this be shared to also handle LLVM intrinsics?
Value *X;
switch (Func) {
Modified: llvm/trunk/test/CodeGen/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp.ll?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcmp.ll Fri Mar 8 01:07:45 2019
@@ -1340,3 +1340,70 @@ define i32 @huge_length(i8* %X, i8* %Y)
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 9223372036854775807) nounwind
ret i32 %m
}
+
+define i1 @huge_length_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: huge_length_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl $2147483647 # imm = 0x7FFFFFFF
+; X86-NEXT: pushl $-1
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: huge_length_eq:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 9223372036854775807) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
+
+; This checks non-constant sizes.
+define i32 @nonconst_length(i8* %X, i8* %Y, i64 %size) nounwind {
+; X86-LABEL: nonconst_length:
+; X86: # %bb.0:
+; X86-NEXT: jmp memcmp # TAILCALL
+;
+; X64-LABEL: nonconst_length:
+; X64: # %bb.0:
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 %size) nounwind
+ ret i32 %m
+}
+
+define i1 @nonconst_length_eq(i8* %X, i8* %Y, i64 %size) nounwind {
+; X86-LABEL: nonconst_length_eq:
+; X86: # %bb.0:
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll memcmp
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: nonconst_length_eq:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: sete %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 %size) nounwind
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
+}
Modified: llvm/trunk/test/Transforms/InferFunctionAttrs/annotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InferFunctionAttrs/annotate.ll?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InferFunctionAttrs/annotate.ll (original)
+++ llvm/trunk/test/Transforms/InferFunctionAttrs/annotate.ll Fri Mar 8 01:07:45 2019
@@ -1,5 +1,5 @@
-; RUN: opt < %s -mtriple=x86_64-- -inferattrs -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-- -passes=inferattrs -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-- -inferattrs -S | FileCheck -check-prefix=CHECK-UNKNOWN %s
+; RUN: opt < %s -mtriple=x86_64-- -passes=inferattrs -S | FileCheck -check-prefix=CHECK-UNKNOWN %s
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-DARWIN %s
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -inferattrs -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-LINUX %s
; RUN: opt < %s -mtriple=nvptx -inferattrs -S | FileCheck -check-prefix=CHECK-NVPTX %s
@@ -241,7 +241,10 @@ declare i64 @atol(i8*)
; CHECK: declare i64 @atoll(i8* nocapture) [[G1]]
declare i64 @atoll(i8*)
-; CHECK: declare i32 @bcmp(i8* nocapture, i8* nocapture, i64) [[G1]]
+; CHECK-DARWIN: declare i32 @bcmp(i8* nocapture, i8* nocapture, i64) [[G1]]
+; CHECK-LINUX: declare i32 @bcmp(i8* nocapture, i8* nocapture, i64) [[G1]]
+; CHECK-UNKNOWN-NOT: declare i32 @bcmp(i8* nocapture, i8* nocapture, i64) [[G1]]
+; CHECK-NVPTX-NOT: declare i32 @bcmp(i8* nocapture, i8* nocapture, i64) [[G1]]
declare i32 @bcmp(i8*, i8*, i64)
; CHECK: declare void @bcopy(i8* nocapture readonly, i8* nocapture, i64) [[G0]]
Modified: llvm/trunk/test/Transforms/InstCombine/memcmp-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/memcmp-1.ll?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/memcmp-1.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/memcmp-1.ll Fri Mar 8 01:07:45 2019
@@ -1,6 +1,7 @@
; Test that the memcmp library call simplifier works correctly.
;
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck --check-prefix=CHECK --check-prefix=NOBCMP %s
+; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix=CHECK --check-prefix=BCMP %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32:64"
@@ -130,3 +131,21 @@ define i1 @test_simplify9(i16 %x, i16 %y
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
+
+; Check memcmp(mem1, mem2, size)==0 -> bcmp(mem1, mem2, size)==0
+
+define i1 @test_simplify10(i8* %mem1, i8* %mem2, i32 %size) {
+; NOBCMP-LABEL: @test_simplify10(
+; NOBCMP-NEXT: [[CALL:%.*]] = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 %size)
+; NOBCMP-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; NOBCMP-NEXT: ret i1 [[CMP]]
+;
+; BCMP-LABEL: @test_simplify10(
+; BCMP-NEXT: [[CALL:%.*]] = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 %size)
+; BCMP-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; BCMP-NEXT: ret i1 [[CMP]]
+;
+ %call = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 %size)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
Modified: llvm/trunk/test/Transforms/InstCombine/strcmp-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/strcmp-1.ll?rev=355672&r1=355671&r2=355672&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/strcmp-1.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/strcmp-1.ll Fri Mar 8 01:07:45 2019
@@ -1,5 +1,6 @@
; Test that the strcmp library call simplifier works correctly.
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s --check-prefix=NOBCMP
+; RUN: opt < %s -instcombine -mtriple=unknown-unknown-linux-gnu -S | FileCheck %s --check-prefix=BCMP
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
@@ -80,3 +81,24 @@ define i32 @test6(i8* %str) {
%temp1 = call i32 @strcmp(i8* %str, i8* %str)
ret i32 %temp1
}
+
+; strcmp(x, y) == 0 -> bcmp(x, y, <known length>)
+define i1 @test7(i1 %b) {
+; BCMP-LABEL: @test7(
+; BCMP: %bcmp = call i32 @bcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; BCMP: %res = icmp eq i32 %bcmp, 0
+; BCMP: ret i1 %res
+
+; NOBCMP-LABEL: @test7(
+; NOBCMP: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; NOBCMP: %res = icmp eq i32 %memcmp, 0
+; NOBCMP: ret i1 %res
+
+ %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+ %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+ %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
+ %str2 = select i1 %b, i8* %temp1, i8* %temp2
+ %temp3 = call i32 @strcmp(i8* %str1, i8* %str2)
+ %res = icmp eq i32 %temp3, 0
+ ret i1 %res
+}
More information about the llvm-commits
mailing list