[llvm] 5d3fd3d - [RISCV] Make ctlz/cttz cheap to speculatively execute so CodeGenPrepare won't insert a zero check.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 9 10:14:25 PST 2020
Author: Craig Topper
Date: 2020-11-09T10:13:45-08:00
New Revision: 5d3fd3df94e5463f8460fbc1a643e663b0e6cb2b
URL: https://github.com/llvm/llvm-project/commit/5d3fd3df94e5463f8460fbc1a643e663b0e6cb2b
DIFF: https://github.com/llvm/llvm-project/commit/5d3fd3df94e5463f8460fbc1a643e663b0e6cb2b.diff
LOG: [RISCV] Make ctlz/cttz cheap to speculatively execute so CodeGenPrepare won't insert a zero check.
Add additional isel patterns for ctzw/clzw instructions.
Differential Revision: https://reviews.llvm.org/D91040
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoB.td
llvm/test/CodeGen/RISCV/rv32Zbb.ll
llvm/test/CodeGen/RISCV/rv64Zbb.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 76fd93830aa5..8d5e3a2a254c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -368,6 +368,14 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
+bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
+ return Subtarget.hasStdExtZbb();
+}
+
+bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
+ return Subtarget.hasStdExtZbb();
+}
+
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
if (VT == MVT::f32 && !Subtarget.hasStdExtF())
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index e420e879efc9..53e86f75865d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -74,6 +74,8 @@ class RISCVTargetLowering : public TargetLowering {
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index c57ba326088d..d327781277ce 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -1037,8 +1037,13 @@ def : Pat<(sext_inreg (fshl GPR:$rs3, (shl GPR:$rs1, (i64 32)),
let Predicates = [HasStdExtZbb, IsRV64] in {
def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
(CLZW GPR:$rs1)>;
-// We don't pattern-match CTZW here as it has the same pattern and result as
-// RV64 CTZ
+// computeKnownBits can't figure out that the and mask on the add result is
+// unnecessary so we need to pattern match it away.
+def : Pat<(and (add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
+ (i64 0xFFFFFFFF)),
+ (CLZW GPR:$rs1)>;
+def : Pat<(cttz (or GPR:$rs1, (i64 0x100000000))),
+ (CTZW GPR:$rs1)>;
def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
} // Predicates = [HasStdExtZbb, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbb.ll b/llvm/test/CodeGen/RISCV/rv32Zbb.ll
index c6c771c18497..8da1fe64c65f 100644
--- a/llvm/test/CodeGen/RISCV/rv32Zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32Zbb.ll
@@ -362,22 +362,12 @@ define i32 @ctlz_i32(i32 %a) nounwind {
;
; RV32IB-LABEL: ctlz_i32:
; RV32IB: # %bb.0:
-; RV32IB-NEXT: beqz a0, .LBB8_2
-; RV32IB-NEXT: # %bb.1: # %cond.false
; RV32IB-NEXT: clz a0, a0
; RV32IB-NEXT: ret
-; RV32IB-NEXT: .LBB8_2:
-; RV32IB-NEXT: addi a0, zero, 32
-; RV32IB-NEXT: ret
;
; RV32IBB-LABEL: ctlz_i32:
; RV32IBB: # %bb.0:
-; RV32IBB-NEXT: beqz a0, .LBB8_2
-; RV32IBB-NEXT: # %bb.1: # %cond.false
; RV32IBB-NEXT: clz a0, a0
-; RV32IBB-NEXT: ret
-; RV32IBB-NEXT: .LBB8_2:
-; RV32IBB-NEXT: addi a0, zero, 32
; RV32IBB-NEXT: ret
%1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
ret i32 %1
@@ -545,22 +535,12 @@ define i32 @cttz_i32(i32 %a) nounwind {
;
; RV32IB-LABEL: cttz_i32:
; RV32IB: # %bb.0:
-; RV32IB-NEXT: beqz a0, .LBB10_2
-; RV32IB-NEXT: # %bb.1: # %cond.false
; RV32IB-NEXT: ctz a0, a0
; RV32IB-NEXT: ret
-; RV32IB-NEXT: .LBB10_2:
-; RV32IB-NEXT: addi a0, zero, 32
-; RV32IB-NEXT: ret
;
; RV32IBB-LABEL: cttz_i32:
; RV32IBB: # %bb.0:
-; RV32IBB-NEXT: beqz a0, .LBB10_2
-; RV32IBB-NEXT: # %bb.1: # %cond.false
; RV32IBB-NEXT: ctz a0, a0
-; RV32IBB-NEXT: ret
-; RV32IBB-NEXT: .LBB10_2:
-; RV32IBB-NEXT: addi a0, zero, 32
; RV32IBB-NEXT: ret
%1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
ret i32 %1
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbb.ll b/llvm/test/CodeGen/RISCV/rv64Zbb.ll
index d7f0548e9658..a1d0b8a74b26 100644
--- a/llvm/test/CodeGen/RISCV/rv64Zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbb.ll
@@ -290,22 +290,12 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
;
; RV64IB-LABEL: ctlz_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: beqz a0, .LBB9_2
-; RV64IB-NEXT: # %bb.1: # %cond.false
; RV64IB-NEXT: clzw a0, a0
; RV64IB-NEXT: ret
-; RV64IB-NEXT: .LBB9_2:
-; RV64IB-NEXT: addi a0, zero, 32
-; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: ctlz_i32:
; RV64IBB: # %bb.0:
-; RV64IBB-NEXT: beqz a0, .LBB9_2
-; RV64IBB-NEXT: # %bb.1: # %cond.false
; RV64IBB-NEXT: clzw a0, a0
-; RV64IBB-NEXT: ret
-; RV64IBB-NEXT: .LBB9_2:
-; RV64IBB-NEXT: addi a0, zero, 32
; RV64IBB-NEXT: ret
%1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
ret i32 %1
@@ -385,22 +375,12 @@ define i64 @ctlz_i64(i64 %a) nounwind {
;
; RV64IB-LABEL: ctlz_i64:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: beqz a0, .LBB10_2
-; RV64IB-NEXT: # %bb.1: # %cond.false
; RV64IB-NEXT: clz a0, a0
; RV64IB-NEXT: ret
-; RV64IB-NEXT: .LBB10_2:
-; RV64IB-NEXT: addi a0, zero, 64
-; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: ctlz_i64:
; RV64IBB: # %bb.0:
-; RV64IBB-NEXT: beqz a0, .LBB10_2
-; RV64IBB-NEXT: # %bb.1: # %cond.false
; RV64IBB-NEXT: clz a0, a0
-; RV64IBB-NEXT: ret
-; RV64IBB-NEXT: .LBB10_2:
-; RV64IBB-NEXT: addi a0, zero, 64
; RV64IBB-NEXT: ret
%1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
ret i64 %1
@@ -470,22 +450,12 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
;
; RV64IB-LABEL: cttz_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: beqz a0, .LBB11_2
-; RV64IB-NEXT: # %bb.1: # %cond.false
-; RV64IB-NEXT: ctz a0, a0
-; RV64IB-NEXT: ret
-; RV64IB-NEXT: .LBB11_2:
-; RV64IB-NEXT: addi a0, zero, 32
+; RV64IB-NEXT: ctzw a0, a0
; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: cttz_i32:
; RV64IBB: # %bb.0:
-; RV64IBB-NEXT: beqz a0, .LBB11_2
-; RV64IBB-NEXT: # %bb.1: # %cond.false
-; RV64IBB-NEXT: ctz a0, a0
-; RV64IBB-NEXT: ret
-; RV64IBB-NEXT: .LBB11_2:
-; RV64IBB-NEXT: addi a0, zero, 32
+; RV64IBB-NEXT: ctzw a0, a0
; RV64IBB-NEXT: ret
%1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
ret i32 %1
@@ -555,22 +525,12 @@ define i64 @cttz_i64(i64 %a) nounwind {
;
; RV64IB-LABEL: cttz_i64:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: beqz a0, .LBB12_2
-; RV64IB-NEXT: # %bb.1: # %cond.false
; RV64IB-NEXT: ctz a0, a0
; RV64IB-NEXT: ret
-; RV64IB-NEXT: .LBB12_2:
-; RV64IB-NEXT: addi a0, zero, 64
-; RV64IB-NEXT: ret
;
; RV64IBB-LABEL: cttz_i64:
; RV64IBB: # %bb.0:
-; RV64IBB-NEXT: beqz a0, .LBB12_2
-; RV64IBB-NEXT: # %bb.1: # %cond.false
; RV64IBB-NEXT: ctz a0, a0
-; RV64IBB-NEXT: ret
-; RV64IBB-NEXT: .LBB12_2:
-; RV64IBB-NEXT: addi a0, zero, 64
; RV64IBB-NEXT: ret
%1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
ret i64 %1
More information about the llvm-commits
mailing list