[llvm] af57a71 - [RISCV] Don't call setHasMultipleConditionRegisters(), so icmp is sunk

Fri Nov 19 08:40:23 PST 2021

Author: Philipp Tomsich
Date: 2021-11-19T08:32:59-08:00
New Revision: af57a71d1871ec4a108ca1b4478114770b6588bd

URL: https://github.com/llvm/llvm-project/commit/af57a71d1871ec4a108ca1b4478114770b6588bd
DIFF: https://github.com/llvm/llvm-project/commit/af57a71d1871ec4a108ca1b4478114770b6588bd.diff

LOG: [RISCV] Don't call setHasMultipleConditionRegisters(), so icmp is sunk

On RISC-V, icmp is not sunk (as the following snippet shows) which
generates the following suboptimal branch pattern:
```
  core_list_find:
	lh	a2, 2(a1)
	seqz	a3, a0         <<
	bltz	a2, .LBB0_5
	bnez	a3, .LBB0_9    << should sink the seqz
        [...]
	j	.LBB0_9
  .LBB0_5:
	bnez	a3, .LBB0_9    << should sink the seqz
	lh	a1, 0(a1)
        [...]
```
due to an icmp not being sunk.

The blocks after `codegenprepare` look as follows:
```
  define dso_local %struct.list_head_s* @core_list_find(%struct.list_head_s* readonly %list, %struct.list_data_s* nocapture readonly %info) local_unnamed_addr #0 {
  entry:
    %idx = getelementptr inbounds %struct.list_data_s, %struct.list_data_s* %info, i64 0, i32 1
    %0 = load i16, i16* %idx, align 2, !tbaa !4
    %cmp = icmp sgt i16 %0, -1
    %tobool.not37 = icmp eq %struct.list_head_s* %list, null
    br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader

  while.cond9.preheader:                            ; preds = %entry
    br i1 %tobool.not37, label %return, label %land.rhs11.lr.ph
```
where the `%tobool.not37` is the result of the icmp that is not sunk.
Note that it is computed in the basic-block up until what becomes the
`bltz` instruction and the `bnez` is a basic-block of its own.

Compare this to what happens on AArch64 (where the icmp is correctly sunk):
```
  define dso_local %struct.list_head_s* @core_list_find(%struct.list_head_s* readonly %list, %struct.list_data_s* nocapture readonly %info) local_unnamed_addr #0 {
  entry:
    %idx = getelementptr inbounds %struct.list_data_s, %struct.list_data_s* %info, i64 0, i32 1
    %0 = load i16, i16* %idx, align 2, !tbaa !6
    %cmp = icmp sgt i16 %0, -1
    br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader

  while.cond9.preheader:                            ; preds = %entry
    %1 = icmp eq %struct.list_head_s* %list, null
    br i1 %1, label %return, label %land.rhs11.lr.ph
```

This is caused by sinkCmpExpression() being skipped, if multiple
condition registers are supported.

Given that the check for multiple condition registers affect only
sinkCmpExpression() and shouldNormalizeToSelectSequence(), this change
adjusts the RISC-V target as follows:
 * we no longer signal multiple condition registers (thus changing
   the behaviour of sinkCmpExpression() back to sinking the icmp)
 * we override shouldNormalizeToSelectSequence() to let always select
   the preferred normalisation strategy for our backend

With both changes, the test results remain unchanged.  Note that without
the target-specific override to shouldNormalizeToSelectSequence(), there
is worse code (more branches) generated for select-and.ll and select-or.ll.

The original test case changes as expected:
```
  core_list_find:
	lh	a2, 2(a1)
	bltz	a2, .LBB0_5
	beqz	a0, .LBB0_9    <<
        [...]
	j	.LBB0_9
.LBB0_5:
	beqz	a0, .LBB0_9    <<
	lh	a1, 0(a1)
        [...]
```

Differential Revision: https://reviews.llvm.org/D98932

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/test/CodeGen/RISCV/sink-icmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2257e2f371736..0f1a6e5f9154a 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -963,9 +963,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   // Jumps are expensive, compared to logic
   setJumpIsExpensive();
 
-  // We can use any register for comparisons
-  setHasMultipleConditionRegisters();
-
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::AND);

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3edaebb215337..8e3d716ae9192 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -620,6 +620,14 @@ class RISCVTargetLowering : public TargetLowering {
   /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
   /// this override can be removed.
   bool mergeStoresAfterLegalization(EVT VT) const override;
+
+  /// Disable normalizing
+  /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+  /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
+  /// RISCV doesn't have flags so it's better to perform the and/or in a GPR.
+  bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
+    return false;
+  };
 };
 
 namespace RISCV {

diff  --git a/llvm/test/CodeGen/RISCV/sink-icmp.ll b/llvm/test/CodeGen/RISCV/sink-icmp.ll
index 409545c1c88b3..4597b623d25a9 100644
--- a/llvm/test/CodeGen/RISCV/sink-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/sink-icmp.ll
@@ -10,8 +10,7 @@ define signext i16 @func(i16* %a, i16* %b) {
 ; RV32-NEXT:    lh a0, 0(a0)
 ; RV32-NEXT:    bltz a0, .LBB0_3
 ; RV32-NEXT:  # %bb.1: # %.LBB0_1
-; RV32-NEXT:    seqz a1, a1
-; RV32-NEXT:    bnez a1, .LBB0_3
+; RV32-NEXT:    beqz a1, .LBB0_3
 ; RV32-NEXT:  # %bb.2: # %.LBB0_2
 ; RV32-NEXT:    ret
 ; RV32-NEXT:  .LBB0_3: # %return
@@ -23,8 +22,7 @@ define signext i16 @func(i16* %a, i16* %b) {
 ; RV64-NEXT:    lh a0, 0(a0)
 ; RV64-NEXT:    bltz a0, .LBB0_3
 ; RV64-NEXT:  # %bb.1: # %.LBB0_1
-; RV64-NEXT:    seqz a1, a1
-; RV64-NEXT:    bnez a1, .LBB0_3
+; RV64-NEXT:    beqz a1, .LBB0_3
 ; RV64-NEXT:  # %bb.2: # %.LBB0_2
 ; RV64-NEXT:    ret
 ; RV64-NEXT:  .LBB0_3: # %return