[PATCH] D87988: [SystemZ] Optimize bcmp calls (PR47420)
Dávid Bolvanský via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 23 13:01:18 PDT 2020
xbolva00 updated this revision to Diff 293839.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D87988/new/
https://reviews.llvm.org/D87988
Files:
llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
llvm/test/CodeGen/SystemZ/bcmp.ll
Index: llvm/test/CodeGen/SystemZ/bcmp.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/SystemZ/bcmp.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare signext i32 @bcmp(i8* nocapture, i8* nocapture, i64)
+
+define zeroext i1 @test_bcmp_eq_0(i8* nocapture readonly %A, i8* nocapture readonly %B) {
+; CHECK-LABEL: test_bcmp_eq_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clc 0(2,%r3), 0(%r2)
+; CHECK-NEXT: ipm %r0
+; CHECK-NEXT: afi %r0, -268435456
+; CHECK-NEXT: risbg %r2, %r0, 63, 191, 33
+; CHECK-NEXT: br %r14
+ %c = tail call signext i32 @bcmp(i8* %A, i8* %B, i64 2)
+ %res = icmp eq i32 %c, 0
+ ret i1 %res
+}
+
+define signext i32 @test_bcmp(i8* nocapture readonly %A, i8* nocapture readonly %B) {
+; CHECK-LABEL: test_bcmp:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clc 0(2,%r3), 0(%r2)
+; CHECK-NEXT: ipm %r0
+; CHECK-NEXT: sllg %r0, %r0, 34
+; CHECK-NEXT: srag %r2, %r0, 62
+; CHECK-NEXT: br %r14
+ %res = tail call signext i32 @bcmp(i8* %A, i8* %B, i64 2)
+ ret i32 %res
+}
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -740,7 +740,7 @@
void visitFence(const FenceInst &I);
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
- bool visitMemCmpCall(const CallInst &I);
+ bool visitMemCmpBCmpCall(const CallInst &I);
bool visitMemPCpyCall(const CallInst &I);
bool visitMemChrCall(const CallInst &I);
bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7271,12 +7271,12 @@
setValue(&I, Value);
}
-/// See if we can lower a memcmp call into an optimized form. If so, return
+/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
-bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
@@ -7587,6 +7587,10 @@
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
+ case LibFunc_bcmp:
+ if (visitMemCmpBCmpCall(I))
+ return;
+ break;
case LibFunc_copysign:
case LibFunc_copysignf:
case LibFunc_copysignl:
@@ -7688,7 +7692,7 @@
return;
break;
case LibFunc_memcmp:
- if (visitMemCmpCall(I))
+ if (visitMemCmpBCmpCall(I))
return;
break;
case LibFunc_mempcpy:
Index: llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
===================================================================
--- llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -85,7 +85,7 @@
return SDValue();
}
- /// Emit target-specific code that performs a memcmp, in cases where that is
+ /// Emit target-specific code that performs a memcmp/bcmp, in cases where that is
/// faster than a libcall. The first returned SDValue is the result of the
/// memcmp and the second is the chain. Both SDValues can be null if a normal
/// libcall should be used.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D87988.293839.patch
Type: text/x-patch
Size: 3929 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200923/9cc2a872/attachment.bin>
More information about the llvm-commits
mailing list