[llvm] 0ebb02b - [RISCV] Override TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd.

Wed May 11 11:13:39 PDT 2022

Author: Craig Topper
Date: 2022-05-11T11:13:17-07:00
New Revision: 0ebb02b90a47b43e039cb7040cc9fb7dd5ec5fce

URL: https://github.com/llvm/llvm-project/commit/0ebb02b90a47b43e039cb7040cc9fb7dd5ec5fce
DIFF: https://github.com/llvm/llvm-project/commit/0ebb02b90a47b43e039cb7040cc9fb7dd5ec5fce.diff

LOG: [RISCV] Override TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd.

This hook determines if SimplifySetcc transforms (X & (C l>>/<< Y))
==/!= 0 into ((X <</l>> Y) & C) ==/!= 0. Where C is a constant and
X might be a constant.

The default implementation favors doing the transform if X is not
a constant. Otherwise the code is left alone. There is a provision
that if the target supports a bit test instruction then the transform
will favor ((1 << Y) & X) ==/!= 0. RISCV does not say it has a variable
bit test operation.

RISCV with Zbs does have a BEXT instruction that performs (X >> Y) & 1.
Without Zbs, (X >> Y) & 1 still looks preferable to ((1 << Y) & X) since
we can fold use ANDI instead of putting a 1 in a register for SLL.

This patch overrides this hook to favor bit extract patterns and
otherwise falls back to the "do the transform if X is not a constant"
heuristic.

I've added tests where both C and X are constants with both the shl form
and lshr form. I've also added a test for a switch statement that lowers
to a bit test. That was my original motivation for looking at this.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D124639

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/test/CodeGen/RISCV/bittest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ac5c0af1d34e..3a6d388e5734 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1114,6 +1114,30 @@ bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
   return C && C->getAPIntValue().ule(10);
 }
 
+bool RISCVTargetLowering::
+    shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+        SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+        unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+        SelectionDAG &DAG) const {
+  // One interesting pattern that we'd want to form is 'bit extract':
+  //   ((1 >> Y) & 1) ==/!= 0
+  // But we also need to be careful not to try to reverse that fold.
+
+  // Is this '((1 >> Y) & 1)'?
+  if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
+    return false; // Keep the 'bit extract' pattern.
+
+  // Will this be '((1 >> Y) & 1)' after the transform?
+  if (NewShiftOpcode == ISD::SRL && CC->isOne())
+    return true; // Do form the 'bit extract' pattern.
+
+  // If 'X' is a constant, and we transform, then we will immediately
+  // try to undo the fold, thus causing endless combine loop.
+  // So only do the transform if X is not a constant. This matches the default
+  // implementation of this function.
+  return !XC;
+}
+
 /// Check if sinking \p I's operands to I's basic block is profitable, because
 /// the operands can be folded into a target instruction, e.g.
 /// splats of scalars can fold into vector instructions.

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5255d5c2792b..11a30c834fc2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -348,6 +348,10 @@ class RISCVTargetLowering : public TargetLowering {
   bool isCheapToSpeculateCtlz() const override;
   bool hasAndNotCompare(SDValue Y) const override;
   bool hasBitTest(SDValue X, SDValue Y) const override;
+  bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+      SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+      unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+      SelectionDAG &DAG) const override;
   bool shouldSinkOperands(Instruction *I,
                           SmallVectorImpl<Use *> &Ops) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT,

diff  --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll
index b2a1ba0508ea..163a3076bb44 100644
--- a/llvm/test/CodeGen/RISCV/bittest.ll
+++ b/llvm/test/CodeGen/RISCV/bittest.ll
@@ -211,3 +211,228 @@ define i64 @bittest_63_i64(i64 %a) nounwind {
   %and = and i64 %not, 1
   ret i64 %and
 }
+
+; Make sure we use (andi (srl X, Y), 1) or bext.
+define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind {
+; RV32I-LABEL: bittest_constant_by_var_shr_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 301408
+; RV32I-NEXT:    addi a1, a1, 722
+; RV32I-NEXT:    srl a0, a1, a0
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: bittest_constant_by_var_shr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 301408
+; RV64I-NEXT:    addiw a1, a1, 722
+; RV64I-NEXT:    srlw a0, a1, a0
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV32ZBS-LABEL: bittest_constant_by_var_shr_i32:
+; RV32ZBS:       # %bb.0:
+; RV32ZBS-NEXT:    lui a1, 301408
+; RV32ZBS-NEXT:    addi a1, a1, 722
+; RV32ZBS-NEXT:    bext a0, a1, a0
+; RV32ZBS-NEXT:    ret
+;
+; RV64ZBS-LABEL: bittest_constant_by_var_shr_i32:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    lui a1, 301408
+; RV64ZBS-NEXT:    addiw a1, a1, 722
+; RV64ZBS-NEXT:    bext a0, a1, a0
+; RV64ZBS-NEXT:    ret
+  %shl = lshr i32 1234567890, %b
+  %and = and i32 %shl, 1
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+; Make sure we use (andi (srl X, Y), 1) or bext.
+define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind {
+; RV32I-LABEL: bittest_constant_by_var_shl_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 301408
+; RV32I-NEXT:    addi a1, a1, 722
+; RV32I-NEXT:    srl a0, a1, a0
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: bittest_constant_by_var_shl_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 301408
+; RV64I-NEXT:    addiw a1, a1, 722
+; RV64I-NEXT:    srlw a0, a1, a0
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV32ZBS-LABEL: bittest_constant_by_var_shl_i32:
+; RV32ZBS:       # %bb.0:
+; RV32ZBS-NEXT:    lui a1, 301408
+; RV32ZBS-NEXT:    addi a1, a1, 722
+; RV32ZBS-NEXT:    bext a0, a1, a0
+; RV32ZBS-NEXT:    ret
+;
+; RV64ZBS-LABEL: bittest_constant_by_var_shl_i32:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    lui a1, 301408
+; RV64ZBS-NEXT:    addiw a1, a1, 722
+; RV64ZBS-NEXT:    bext a0, a1, a0
+; RV64ZBS-NEXT:    ret
+  %shl = shl i32 1, %b
+  %and = and i32 %shl, 1234567890
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+; Make sure we use (andi (srl X, Y), 1) or bext.
+define i1 @bittest_constant_by_var_shr_i64(i64 %b) nounwind {
+; RV32-LABEL: bittest_constant_by_var_shr_i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a1, a0, -32
+; RV32-NEXT:    bltz a1, .LBB12_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    andi a0, zero, 1
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB12_2:
+; RV32-NEXT:    lui a1, 301408
+; RV32-NEXT:    addi a1, a1, 722
+; RV32-NEXT:    srl a0, a1, a0
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    ret
+;
+; RV64I-LABEL: bittest_constant_by_var_shr_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 301408
+; RV64I-NEXT:    addiw a1, a1, 722
+; RV64I-NEXT:    srl a0, a1, a0
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bittest_constant_by_var_shr_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    lui a1, 301408
+; RV64ZBS-NEXT:    addiw a1, a1, 722
+; RV64ZBS-NEXT:    bext a0, a1, a0
+; RV64ZBS-NEXT:    ret
+  %shl = lshr i64 1234567890, %b
+  %and = and i64 %shl, 1
+  %cmp = icmp ne i64 %and, 0
+  ret i1 %cmp
+}
+
+; Make sure we use (andi (srl X, Y), 1) or bext.
+define i1 @bittest_constant_by_var_shl_i64(i64 %b) nounwind {
+; RV32-LABEL: bittest_constant_by_var_shl_i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a1, a0, -32
+; RV32-NEXT:    bltz a1, .LBB13_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    andi a0, zero, 1
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB13_2:
+; RV32-NEXT:    lui a1, 301408
+; RV32-NEXT:    addi a1, a1, 722
+; RV32-NEXT:    srl a0, a1, a0
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    ret
+;
+; RV64I-LABEL: bittest_constant_by_var_shl_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 301408
+; RV64I-NEXT:    addiw a1, a1, 722
+; RV64I-NEXT:    srl a0, a1, a0
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBS-LABEL: bittest_constant_by_var_shl_i64:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    lui a1, 301408
+; RV64ZBS-NEXT:    addiw a1, a1, 722
+; RV64ZBS-NEXT:    bext a0, a1, a0
+; RV64ZBS-NEXT:    ret
+  %shl = shl i64 1, %b
+  %and = and i64 %shl, 1234567890
+  %cmp = icmp ne i64 %and, 0
+  ret i1 %cmp
+}
+
+; We want to use (andi (srl X, Y), 1) or bext before the beqz.
+define void @bittest_switch(i32 signext %0) {
+; RV32I-LABEL: bittest_switch:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 31
+; RV32I-NEXT:    bltu a1, a0, .LBB14_3
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    lui a1, 524291
+; RV32I-NEXT:    addi a1, a1, 768
+; RV32I-NEXT:    srl a0, a1, a0
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    beqz a0, .LBB14_3
+; RV32I-NEXT:  # %bb.2:
+; RV32I-NEXT:    tail bar at plt
+; RV32I-NEXT:  .LBB14_3:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: bittest_switch:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 31
+; RV64I-NEXT:    bltu a1, a0, .LBB14_3
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    lui a1, 2048
+; RV64I-NEXT:    addiw a1, a1, 51
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    srl a0, a1, a0
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB14_3
+; RV64I-NEXT:  # %bb.2:
+; RV64I-NEXT:    tail bar at plt
+; RV64I-NEXT:  .LBB14_3:
+; RV64I-NEXT:    ret
+;
+; RV32ZBS-LABEL: bittest_switch:
+; RV32ZBS:       # %bb.0:
+; RV32ZBS-NEXT:    li a1, 31
+; RV32ZBS-NEXT:    bltu a1, a0, .LBB14_3
+; RV32ZBS-NEXT:  # %bb.1:
+; RV32ZBS-NEXT:    lui a1, 524291
+; RV32ZBS-NEXT:    addi a1, a1, 768
+; RV32ZBS-NEXT:    bext a0, a1, a0
+; RV32ZBS-NEXT:    beqz a0, .LBB14_3
+; RV32ZBS-NEXT:  # %bb.2:
+; RV32ZBS-NEXT:    tail bar at plt
+; RV32ZBS-NEXT:  .LBB14_3:
+; RV32ZBS-NEXT:    ret
+;
+; RV64ZBS-LABEL: bittest_switch:
+; RV64ZBS:       # %bb.0:
+; RV64ZBS-NEXT:    li a1, 31
+; RV64ZBS-NEXT:    bltu a1, a0, .LBB14_3
+; RV64ZBS-NEXT:  # %bb.1:
+; RV64ZBS-NEXT:    lui a1, 2048
+; RV64ZBS-NEXT:    addiw a1, a1, 51
+; RV64ZBS-NEXT:    slli a1, a1, 8
+; RV64ZBS-NEXT:    bext a0, a1, a0
+; RV64ZBS-NEXT:    beqz a0, .LBB14_3
+; RV64ZBS-NEXT:  # %bb.2:
+; RV64ZBS-NEXT:    tail bar at plt
+; RV64ZBS-NEXT:  .LBB14_3:
+; RV64ZBS-NEXT:    ret
+  switch i32 %0, label %3 [
+    i32 8, label %2
+    i32 9, label %2
+    i32 12, label %2
+    i32 13, label %2
+    i32 31, label %2
+  ]
+
+2:
+  tail call void @bar()
+  br label %3
+
+3:
+  ret void
+}
+
+declare void @bar()