[llvm] [TargetLowering][RISCV] Introduce shouldFoldSelectWithSingleBitTest and RISC-V implement. (PR #72978)

Tue Nov 21 03:34:54 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Yeting Kuo (yetingk)

<details>
<summary>Changes</summary>

DAGCombiner folds (select_cc seteq (and x, y), 0, 0, A) to (and (sra (shl x)) A) where y has a single bit set. Previously, DAGCombiner relies on `shouldAvoidTransformToShift` to decide when to do the combine, but `shouldAvoidTransformToShift` is only about shift cost. This patch introuduces a specific hook to decide when to do the combine and disable the combine when Zicond enabled and AndMask <= 1024.

---
Full diff: https://github.com/llvm/llvm-project/pull/72978.diff


6 Files Affected:

- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+8) 
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+2-2) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+7) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+3) 
- (modified) llvm/test/CodeGen/RISCV/condops.ll (+100) 
- (modified) llvm/test/CodeGen/RISCV/select.ll (+123-65) 


``````````diff

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 9ebcc28c38ae672..fbd90a0b75a744e 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3259,6 +3259,14 @@ class TargetLoweringBase {
     return false;
   }
 
+  // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x))
+  // A) where y has a single bit set?
+  virtual bool shouldFoldSelectWithSingleBitTest(EVT VT,
+                                                 const APInt &AndMask) const {
+    unsigned ShCt = AndMask.getBitWidth() - 1;
+    return !shouldAvoidTransformToShift(VT, ShCt);
+  }
+
   /// Does this target require the clearing of high-order bits in a register
   /// passed to the fp16 to fp conversion library function.
   virtual bool shouldKeepZExtForFP16Conv() const { return false; }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2fc9a2866c32dba..83a1a8b3181cc40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27252,8 +27252,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
     if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
       // Shift the tested bit over the sign bit.
       const APInt &AndMask = ConstAndRHS->getAPIntValue();
-      unsigned ShCt = AndMask.getBitWidth() - 1;
-      if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+      if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
+        unsigned ShCt = AndMask.getBitWidth() - 1;
         SDValue ShlAmt =
             DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
                             getShiftAmountTy(AndLHS.getValueType()));
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 26190337eb3bd1b..d3a1f5a9dd9e00c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19673,6 +19673,13 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
     return SDValue();
   return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
 }
+
+bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
+    EVT VT, const APInt &AndMask) const {
+  if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
+    return AndMask.ugt(1024);
+  return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
+}
 namespace llvm::RISCVVIntrinsicsTable {
 
 #define GET_RISCVVIntrinsicsTable_IMPL
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 8f3ff4be22a2d1b..e10db7d441ef25e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -959,6 +959,9 @@ class RISCVTargetLowering : public TargetLowering {
 
   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                         SmallVectorImpl<SDNode *> &Created) const override;
+
+  bool shouldFoldSelectWithSingleBitTest(EVT VT,
+                                         const APInt &AndMask) const override;
 };
 
 namespace RISCV {
diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll
index c405b0ae1717847..b9912c6ccfb98cd 100644
--- a/llvm/test/CodeGen/RISCV/condops.ll
+++ b/llvm/test/CodeGen/RISCV/condops.ll
@@ -3533,3 +3533,103 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
 }
 
 declare void @bat(i16 signext)
+
+define i64 @single_bit(i64 %x) {
+; RV32I-LABEL: single_bit:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    slli a2, a0, 21
+; RV32I-NEXT:    srai a2, a2, 31
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    and a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: single_bit:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    slli a1, a0, 53
+; RV64I-NEXT:    srai a1, a1, 63
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64XVENTANACONDOPS-LABEL: single_bit:
+; RV64XVENTANACONDOPS:       # %bb.0: # %entry
+; RV64XVENTANACONDOPS-NEXT:    andi a1, a0, 1024
+; RV64XVENTANACONDOPS-NEXT:    vt.maskc a0, a0, a1
+; RV64XVENTANACONDOPS-NEXT:    ret
+;
+; RV64XTHEADCONDMOV-LABEL: single_bit:
+; RV64XTHEADCONDMOV:       # %bb.0: # %entry
+; RV64XTHEADCONDMOV-NEXT:    slli a1, a0, 53
+; RV64XTHEADCONDMOV-NEXT:    srai a1, a1, 63
+; RV64XTHEADCONDMOV-NEXT:    and a0, a1, a0
+; RV64XTHEADCONDMOV-NEXT:    ret
+;
+; RV32ZICOND-LABEL: single_bit:
+; RV32ZICOND:       # %bb.0: # %entry
+; RV32ZICOND-NEXT:    andi a2, a0, 1024
+; RV32ZICOND-NEXT:    czero.eqz a0, a0, a2
+; RV32ZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV32ZICOND-NEXT:    ret
+;
+; RV64ZICOND-LABEL: single_bit:
+; RV64ZICOND:       # %bb.0: # %entry
+; RV64ZICOND-NEXT:    andi a1, a0, 1024
+; RV64ZICOND-NEXT:    czero.eqz a0, a0, a1
+; RV64ZICOND-NEXT:    ret
+entry:
+  %and = and i64 %x, 1024
+  %tobool.not = icmp eq i64 %and, 0
+  %cond = select i1 %tobool.not, i64 0, i64 %x
+  ret i64 %cond
+}
+
+; Test to fold select with single bit check to (and (sra (shl x))).
+define i64 @single_bit2(i64 %x) {
+; RV32I-LABEL: single_bit2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    slli a2, a0, 20
+; RV32I-NEXT:    srai a2, a2, 31
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    and a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: single_bit2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    slli a1, a0, 52
+; RV64I-NEXT:    srai a1, a1, 63
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64XVENTANACONDOPS-LABEL: single_bit2:
+; RV64XVENTANACONDOPS:       # %bb.0: # %entry
+; RV64XVENTANACONDOPS-NEXT:    slli a1, a0, 52
+; RV64XVENTANACONDOPS-NEXT:    srai a1, a1, 63
+; RV64XVENTANACONDOPS-NEXT:    and a0, a1, a0
+; RV64XVENTANACONDOPS-NEXT:    ret
+;
+; RV64XTHEADCONDMOV-LABEL: single_bit2:
+; RV64XTHEADCONDMOV:       # %bb.0: # %entry
+; RV64XTHEADCONDMOV-NEXT:    slli a1, a0, 52
+; RV64XTHEADCONDMOV-NEXT:    srai a1, a1, 63
+; RV64XTHEADCONDMOV-NEXT:    and a0, a1, a0
+; RV64XTHEADCONDMOV-NEXT:    ret
+;
+; RV32ZICOND-LABEL: single_bit2:
+; RV32ZICOND:       # %bb.0: # %entry
+; RV32ZICOND-NEXT:    slli a2, a0, 20
+; RV32ZICOND-NEXT:    srai a2, a2, 31
+; RV32ZICOND-NEXT:    and a0, a2, a0
+; RV32ZICOND-NEXT:    and a1, a2, a1
+; RV32ZICOND-NEXT:    ret
+;
+; RV64ZICOND-LABEL: single_bit2:
+; RV64ZICOND:       # %bb.0: # %entry
+; RV64ZICOND-NEXT:    slli a1, a0, 52
+; RV64ZICOND-NEXT:    srai a1, a1, 63
+; RV64ZICOND-NEXT:    and a0, a1, a0
+; RV64ZICOND-NEXT:    ret
+entry:
+  %and = and i64 %x, 2048
+  %tobool.not = icmp eq i64 %and, 0
+  %cond = select i1 %tobool.not, i64 0, i64 %x
+  ret i64 %cond
+}
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index d4a6e9e9dbb4678..7fa27a307757d09 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -1,26 +1,42 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK32,RV32IM %s
-; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,RV64IM %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+xventanacondops -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,RV64IMXVTCONDOPS %s
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK32,CHECKZICOND,RV32IMZICOND %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK64,CHECKZICOND,RV64IMZICOND %s
+; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32IM %s
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64IM %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+xventanacondops -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64IMXVTCONDOPS %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECKZICOND,RV32IMZICOND %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECKZICOND,RV64IMZICOND %s
 
 define i16 @select_xor_1(i16 %A, i8 %cond) {
-; CHECK32-LABEL: select_xor_1:
-; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    slli a1, a1, 31
-; CHECK32-NEXT:    srai a1, a1, 31
-; CHECK32-NEXT:    andi a1, a1, 43
-; CHECK32-NEXT:    xor a0, a0, a1
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: select_xor_1:
-; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    slli a1, a1, 63
-; CHECK64-NEXT:    srai a1, a1, 63
-; CHECK64-NEXT:    andi a1, a1, 43
-; CHECK64-NEXT:    xor a0, a0, a1
-; CHECK64-NEXT:    ret
+; RV32IM-LABEL: select_xor_1:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a1, a1, 31
+; RV32IM-NEXT:    srai a1, a1, 31
+; RV32IM-NEXT:    andi a1, a1, 43
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_xor_1:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    slli a1, a1, 63
+; RV64IM-NEXT:    srai a1, a1, 63
+; RV64IM-NEXT:    andi a1, a1, 43
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_xor_1:
+; RV64IMXVTCONDOPS:       # %bb.0: # %entry
+; RV64IMXVTCONDOPS-NEXT:    andi a1, a1, 1
+; RV64IMXVTCONDOPS-NEXT:    li a2, 43
+; RV64IMXVTCONDOPS-NEXT:    vt.maskc a1, a2, a1
+; RV64IMXVTCONDOPS-NEXT:    xor a0, a0, a1
+; RV64IMXVTCONDOPS-NEXT:    ret
+;
+; CHECKZICOND-LABEL: select_xor_1:
+; CHECKZICOND:       # %bb.0: # %entry
+; CHECKZICOND-NEXT:    andi a1, a1, 1
+; CHECKZICOND-NEXT:    li a2, 43
+; CHECKZICOND-NEXT:    czero.eqz a1, a2, a1
+; CHECKZICOND-NEXT:    xor a0, a0, a1
+; CHECKZICOND-NEXT:    ret
 entry:
  %and = and i8 %cond, 1
  %cmp10 = icmp eq i8 %and, 0
@@ -72,21 +88,35 @@ entry:
 }
 
 define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
-; CHECK32-LABEL: select_xor_2:
-; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    slli a2, a2, 31
-; CHECK32-NEXT:    srai a2, a2, 31
-; CHECK32-NEXT:    and a1, a2, a1
-; CHECK32-NEXT:    xor a0, a0, a1
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: select_xor_2:
-; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    slli a2, a2, 63
-; CHECK64-NEXT:    srai a2, a2, 63
-; CHECK64-NEXT:    and a1, a2, a1
-; CHECK64-NEXT:    xor a0, a0, a1
-; CHECK64-NEXT:    ret
+; RV32IM-LABEL: select_xor_2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a2, a2, 31
+; RV32IM-NEXT:    srai a2, a2, 31
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    xor a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_xor_2:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    slli a2, a2, 63
+; RV64IM-NEXT:    srai a2, a2, 63
+; RV64IM-NEXT:    and a1, a2, a1
+; RV64IM-NEXT:    xor a0, a0, a1
+; RV64IM-NEXT:    ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_xor_2:
+; RV64IMXVTCONDOPS:       # %bb.0: # %entry
+; RV64IMXVTCONDOPS-NEXT:    andi a2, a2, 1
+; RV64IMXVTCONDOPS-NEXT:    vt.maskc a1, a1, a2
+; RV64IMXVTCONDOPS-NEXT:    xor a0, a0, a1
+; RV64IMXVTCONDOPS-NEXT:    ret
+;
+; CHECKZICOND-LABEL: select_xor_2:
+; CHECKZICOND:       # %bb.0: # %entry
+; CHECKZICOND-NEXT:    andi a2, a2, 1
+; CHECKZICOND-NEXT:    czero.eqz a1, a1, a2
+; CHECKZICOND-NEXT:    xor a0, a0, a1
+; CHECKZICOND-NEXT:    ret
 entry:
  %and = and i8 %cond, 1
  %cmp10 = icmp eq i8 %and, 0
@@ -296,21 +326,35 @@ entry:
 }
 
 define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
-; CHECK32-LABEL: select_or:
-; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    slli a2, a2, 31
-; CHECK32-NEXT:    srai a2, a2, 31
-; CHECK32-NEXT:    and a1, a2, a1
-; CHECK32-NEXT:    or a0, a0, a1
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: select_or:
-; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    slli a2, a2, 63
-; CHECK64-NEXT:    srai a2, a2, 63
-; CHECK64-NEXT:    and a1, a2, a1
-; CHECK64-NEXT:    or a0, a0, a1
-; CHECK64-NEXT:    ret
+; RV32IM-LABEL: select_or:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a2, a2, 31
+; RV32IM-NEXT:    srai a2, a2, 31
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_or:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    slli a2, a2, 63
+; RV64IM-NEXT:    srai a2, a2, 63
+; RV64IM-NEXT:    and a1, a2, a1
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_or:
+; RV64IMXVTCONDOPS:       # %bb.0: # %entry
+; RV64IMXVTCONDOPS-NEXT:    andi a2, a2, 1
+; RV64IMXVTCONDOPS-NEXT:    vt.maskc a1, a1, a2
+; RV64IMXVTCONDOPS-NEXT:    or a0, a0, a1
+; RV64IMXVTCONDOPS-NEXT:    ret
+;
+; CHECKZICOND-LABEL: select_or:
+; CHECKZICOND:       # %bb.0: # %entry
+; CHECKZICOND-NEXT:    andi a2, a2, 1
+; CHECKZICOND-NEXT:    czero.eqz a1, a1, a2
+; CHECKZICOND-NEXT:    or a0, a0, a1
+; CHECKZICOND-NEXT:    ret
 entry:
  %and = and i8 %cond, 1
  %cmp10 = icmp eq i8 %and, 0
@@ -360,21 +404,35 @@ entry:
 }
 
 define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
-; CHECK32-LABEL: select_or_1:
-; CHECK32:       # %bb.0: # %entry
-; CHECK32-NEXT:    slli a2, a2, 31
-; CHECK32-NEXT:    srai a2, a2, 31
-; CHECK32-NEXT:    and a1, a2, a1
-; CHECK32-NEXT:    or a0, a0, a1
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: select_or_1:
-; CHECK64:       # %bb.0: # %entry
-; CHECK64-NEXT:    slli a2, a2, 63
-; CHECK64-NEXT:    srai a2, a2, 63
-; CHECK64-NEXT:    and a1, a2, a1
-; CHECK64-NEXT:    or a0, a0, a1
-; CHECK64-NEXT:    ret
+; RV32IM-LABEL: select_or_1:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    slli a2, a2, 31
+; RV32IM-NEXT:    srai a2, a2, 31
+; RV32IM-NEXT:    and a1, a2, a1
+; RV32IM-NEXT:    or a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_or_1:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    slli a2, a2, 63
+; RV64IM-NEXT:    srai a2, a2, 63
+; RV64IM-NEXT:    and a1, a2, a1
+; RV64IM-NEXT:    or a0, a0, a1
+; RV64IM-NEXT:    ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_or_1:
+; RV64IMXVTCONDOPS:       # %bb.0: # %entry
+; RV64IMXVTCONDOPS-NEXT:    andi a2, a2, 1
+; RV64IMXVTCONDOPS-NEXT:    vt.maskc a1, a1, a2
+; RV64IMXVTCONDOPS-NEXT:    or a0, a0, a1
+; RV64IMXVTCONDOPS-NEXT:    ret
+;
+; CHECKZICOND-LABEL: select_or_1:
+; CHECKZICOND:       # %bb.0: # %entry
+; CHECKZICOND-NEXT:    andi a2, a2, 1
+; CHECKZICOND-NEXT:    czero.eqz a1, a1, a2
+; CHECKZICOND-NEXT:    or a0, a0, a1
+; CHECKZICOND-NEXT:    ret
 entry:
  %and = and i32 %cond, 1
  %cmp10 = icmp eq i32 %and, 0

``````````

</details>


https://github.com/llvm/llvm-project/pull/72978