[llvm] 5d3fd3d - [RISCV] Make ctlz/cttz cheap to speculatively execute so CodeGenPrepare won't insert a zero check.

Mon Nov 9 10:14:25 PST 2020

Author: Craig Topper
Date: 2020-11-09T10:13:45-08:00
New Revision: 5d3fd3df94e5463f8460fbc1a643e663b0e6cb2b

URL: https://github.com/llvm/llvm-project/commit/5d3fd3df94e5463f8460fbc1a643e663b0e6cb2b
DIFF: https://github.com/llvm/llvm-project/commit/5d3fd3df94e5463f8460fbc1a643e663b0e6cb2b.diff

LOG: [RISCV] Make ctlz/cttz cheap to speculatively execute so CodeGenPrepare won't insert a zero check.

Add additional isel patterns for ctzw/clzw instructions.

Differential Revision: https://reviews.llvm.org/D91040

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoB.td
    llvm/test/CodeGen/RISCV/rv32Zbb.ll
    llvm/test/CodeGen/RISCV/rv64Zbb.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 76fd93830aa5..8d5e3a2a254c 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -368,6 +368,14 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
 }
 
+bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
+  return Subtarget.hasStdExtZbb();
+}
+
+bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
+  return Subtarget.hasStdExtZbb();
+}
+
 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
                                        bool ForCodeSize) const {
   if (VT == MVT::f32 && !Subtarget.hasStdExtF())

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index e420e879efc9..53e86f75865d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -74,6 +74,8 @@ class RISCVTargetLowering : public TargetLowering {
   bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
   bool isZExtFree(SDValue Val, EVT VT2) const override;
   bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+  bool isCheapToSpeculateCttz() const override;
+  bool isCheapToSpeculateCtlz() const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
 

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index c57ba326088d..d327781277ce 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -1037,8 +1037,13 @@ def : Pat<(sext_inreg (fshl GPR:$rs3, (shl GPR:$rs1, (i64 32)),
 let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
           (CLZW GPR:$rs1)>;
-// We don't pattern-match CTZW here as it has the same pattern and result as
-// RV64 CTZ
+// computeKnownBits can't figure out that the and mask on the add result is
+// unnecessary so we need to pattern match it away.
+def : Pat<(and (add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
+               (i64 0xFFFFFFFF)),
+          (CLZW GPR:$rs1)>;
+def : Pat<(cttz (or GPR:$rs1, (i64 0x100000000))),
+          (CTZW GPR:$rs1)>;
 def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
 } // Predicates = [HasStdExtZbb, IsRV64]
 

diff  --git a/llvm/test/CodeGen/RISCV/rv32Zbb.ll b/llvm/test/CodeGen/RISCV/rv32Zbb.ll
index c6c771c18497..8da1fe64c65f 100644
--- a/llvm/test/CodeGen/RISCV/rv32Zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32Zbb.ll
@@ -362,22 +362,12 @@ define i32 @ctlz_i32(i32 %a) nounwind {
 ;
 ; RV32IB-LABEL: ctlz_i32:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    beqz a0, .LBB8_2
-; RV32IB-NEXT:  # %bb.1: # %cond.false
 ; RV32IB-NEXT:    clz a0, a0
 ; RV32IB-NEXT:    ret
-; RV32IB-NEXT:  .LBB8_2:
-; RV32IB-NEXT:    addi a0, zero, 32
-; RV32IB-NEXT:    ret
 ;
 ; RV32IBB-LABEL: ctlz_i32:
 ; RV32IBB:       # %bb.0:
-; RV32IBB-NEXT:    beqz a0, .LBB8_2
-; RV32IBB-NEXT:  # %bb.1: # %cond.false
 ; RV32IBB-NEXT:    clz a0, a0
-; RV32IBB-NEXT:    ret
-; RV32IBB-NEXT:  .LBB8_2:
-; RV32IBB-NEXT:    addi a0, zero, 32
 ; RV32IBB-NEXT:    ret
   %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
   ret i32 %1
@@ -545,22 +535,12 @@ define i32 @cttz_i32(i32 %a) nounwind {
 ;
 ; RV32IB-LABEL: cttz_i32:
 ; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    beqz a0, .LBB10_2
-; RV32IB-NEXT:  # %bb.1: # %cond.false
 ; RV32IB-NEXT:    ctz a0, a0
 ; RV32IB-NEXT:    ret
-; RV32IB-NEXT:  .LBB10_2:
-; RV32IB-NEXT:    addi a0, zero, 32
-; RV32IB-NEXT:    ret
 ;
 ; RV32IBB-LABEL: cttz_i32:
 ; RV32IBB:       # %bb.0:
-; RV32IBB-NEXT:    beqz a0, .LBB10_2
-; RV32IBB-NEXT:  # %bb.1: # %cond.false
 ; RV32IBB-NEXT:    ctz a0, a0
-; RV32IBB-NEXT:    ret
-; RV32IBB-NEXT:  .LBB10_2:
-; RV32IBB-NEXT:    addi a0, zero, 32
 ; RV32IBB-NEXT:    ret
   %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
   ret i32 %1

diff  --git a/llvm/test/CodeGen/RISCV/rv64Zbb.ll b/llvm/test/CodeGen/RISCV/rv64Zbb.ll
index d7f0548e9658..a1d0b8a74b26 100644
--- a/llvm/test/CodeGen/RISCV/rv64Zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbb.ll
@@ -290,22 +290,12 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
 ;
 ; RV64IB-LABEL: ctlz_i32:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    beqz a0, .LBB9_2
-; RV64IB-NEXT:  # %bb.1: # %cond.false
 ; RV64IB-NEXT:    clzw a0, a0
 ; RV64IB-NEXT:    ret
-; RV64IB-NEXT:  .LBB9_2:
-; RV64IB-NEXT:    addi a0, zero, 32
-; RV64IB-NEXT:    ret
 ;
 ; RV64IBB-LABEL: ctlz_i32:
 ; RV64IBB:       # %bb.0:
-; RV64IBB-NEXT:    beqz a0, .LBB9_2
-; RV64IBB-NEXT:  # %bb.1: # %cond.false
 ; RV64IBB-NEXT:    clzw a0, a0
-; RV64IBB-NEXT:    ret
-; RV64IBB-NEXT:  .LBB9_2:
-; RV64IBB-NEXT:    addi a0, zero, 32
 ; RV64IBB-NEXT:    ret
   %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
   ret i32 %1
@@ -385,22 +375,12 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ;
 ; RV64IB-LABEL: ctlz_i64:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    beqz a0, .LBB10_2
-; RV64IB-NEXT:  # %bb.1: # %cond.false
 ; RV64IB-NEXT:    clz a0, a0
 ; RV64IB-NEXT:    ret
-; RV64IB-NEXT:  .LBB10_2:
-; RV64IB-NEXT:    addi a0, zero, 64
-; RV64IB-NEXT:    ret
 ;
 ; RV64IBB-LABEL: ctlz_i64:
 ; RV64IBB:       # %bb.0:
-; RV64IBB-NEXT:    beqz a0, .LBB10_2
-; RV64IBB-NEXT:  # %bb.1: # %cond.false
 ; RV64IBB-NEXT:    clz a0, a0
-; RV64IBB-NEXT:    ret
-; RV64IBB-NEXT:  .LBB10_2:
-; RV64IBB-NEXT:    addi a0, zero, 64
 ; RV64IBB-NEXT:    ret
   %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
   ret i64 %1
@@ -470,22 +450,12 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
 ;
 ; RV64IB-LABEL: cttz_i32:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    beqz a0, .LBB11_2
-; RV64IB-NEXT:  # %bb.1: # %cond.false
-; RV64IB-NEXT:    ctz a0, a0
-; RV64IB-NEXT:    ret
-; RV64IB-NEXT:  .LBB11_2:
-; RV64IB-NEXT:    addi a0, zero, 32
+; RV64IB-NEXT:    ctzw a0, a0
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBB-LABEL: cttz_i32:
 ; RV64IBB:       # %bb.0:
-; RV64IBB-NEXT:    beqz a0, .LBB11_2
-; RV64IBB-NEXT:  # %bb.1: # %cond.false
-; RV64IBB-NEXT:    ctz a0, a0
-; RV64IBB-NEXT:    ret
-; RV64IBB-NEXT:  .LBB11_2:
-; RV64IBB-NEXT:    addi a0, zero, 32
+; RV64IBB-NEXT:    ctzw a0, a0
 ; RV64IBB-NEXT:    ret
   %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
   ret i32 %1
@@ -555,22 +525,12 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ;
 ; RV64IB-LABEL: cttz_i64:
 ; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    beqz a0, .LBB12_2
-; RV64IB-NEXT:  # %bb.1: # %cond.false
 ; RV64IB-NEXT:    ctz a0, a0
 ; RV64IB-NEXT:    ret
-; RV64IB-NEXT:  .LBB12_2:
-; RV64IB-NEXT:    addi a0, zero, 64
-; RV64IB-NEXT:    ret
 ;
 ; RV64IBB-LABEL: cttz_i64:
 ; RV64IBB:       # %bb.0:
-; RV64IBB-NEXT:    beqz a0, .LBB12_2
-; RV64IBB-NEXT:  # %bb.1: # %cond.false
 ; RV64IBB-NEXT:    ctz a0, a0
-; RV64IBB-NEXT:    ret
-; RV64IBB-NEXT:  .LBB12_2:
-; RV64IBB-NEXT:    addi a0, zero, 64
 ; RV64IBB-NEXT:    ret
   %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
   ret i64 %1