[llvm] r363736 - [RISCV] Prevent re-ordering some adds after shifts

Tue Jun 18 13:38:09 PDT 2019

Author: lenary
Date: Tue Jun 18 13:38:08 2019
New Revision: 363736

URL: http://llvm.org/viewvc/llvm-project?rev=363736&view=rev
Log:
[RISCV] Prevent re-ordering some adds after shifts

Summary:
DAGCombine will normally turn a `(shl (add x, c1), c2)` into `(add (shl x, c2), c1 << c2)`, where `c1` and `c2` are constants. This can be prevented by a callback in TargetLowering.

On RISC-V, materialising the constant `c1 << c2` can be more expensive than materialising `c1`, because materialising the former may take more instructions, and may use a register, where materialising the latter would not.

This patch implements the hook in RISCVTargetLowering to prevent this transform, in the cases where:
- `c1` fits into the immediate field in an `addi` instruction.
- `c1` takes fewer instructions to materialise than `c1 << c2`.

In future, DAGCombine could do the check to see whether `c1` fits into an add immediate, which might simplify more targets hooks than just RISC-V.

Reviewers: asb, luismarques, efriedma

Reviewed By: asb

Subscribers: xbolva00, lebedev.ri, craig.topper, lewis-revill, Jim, hiraditya, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, kito-cheng, shiva0217, jrtc27, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, rkruppe, PkmX, jocewei, psnobl, benna, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62857

Added:
    llvm/trunk/test/CodeGen/RISCV/add-before-shl.ll
Modified:
    llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h
    llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.cpp
    llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.h

Modified: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp?rev=363736&r1=363735&r2=363736&view=diff
==============================================================================

--- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp Tue Jun 18 13:38:08 2019
@@ -17,6 +17,7 @@
 #include "RISCVRegisterInfo.h"
 #include "RISCVSubtarget.h"
 #include "RISCVTargetMachine.h"
+#include "Utils/RISCVMatInt.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -854,6 +855,50 @@ SDValue RISCVTargetLowering::PerformDAGC
   return SDValue();
 }
 
+bool RISCVTargetLowering::isDesirableToCommuteWithShift(
+    const SDNode *N, CombineLevel Level) const {
+  // The following folds are only desirable if `(OP _, c1 << c2)` can be
+  // materialised in fewer instructions than `(OP _, c1)`:
+  //
+  //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+  //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+  SDValue N0 = N->getOperand(0);
+  MVT Ty = N0.getSimpleValueType();
+  if (Ty.isScalarInteger() &&
+      (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
+    auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (C1 && C2) {
+      APInt C1Int = C1->getAPIntValue();
+      APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
+
+      // We can materialise `c1 << c2` into an add immediate, so it's "free",
+      // and the combine should happen, to potentially allow further combines
+      // later.
+      if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
+        return true;
+
+      // We can materialise `c1` in an add immediate, so it's "free", and the
+      // combine should be prevented.
+      if (isLegalAddImmediate(C1Int.getSExtValue()))
+        return false;
+
+      // Neither constant will fit into an immediate, so find materialisation
+      // costs.
+      int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
+                                              Subtarget.is64Bit());
+      int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
+          ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
+
+      // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
+      // combine should be prevented.
+      if (C1Cost < ShiftedC1Cost)
+        return false;
+    }
+  }
+  return true;
+}
+
 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
     unsigned Depth) const {

Modified: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h?rev=363736&r1=363735&r2=363736&view=diff
==============================================================================
--- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h (original)
+++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h Tue Jun 18 13:38:08 2019
@@ -125,6 +125,8 @@ public:
       return false;
     return true;
   }
+  bool isDesirableToCommuteWithShift(const SDNode *N,
+                                     CombineLevel Level) const override;
 
 private:
   void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,

Modified: llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.cpp?rev=363736&r1=363735&r2=363736&view=diff
==============================================================================
--- llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.cpp (original)
+++ llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.cpp Tue Jun 18 13:38:08 2019
@@ -16,7 +16,7 @@
 namespace llvm {
 
 namespace RISCVMatInt {
-void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
+void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res) {
   if (isInt<32>(Val)) {
     // Depending on the active bits in the immediate Value v, the following
     // instruction sequences are emitted:
@@ -32,13 +32,13 @@ void generateInstSeq(int64_t Val, bool I
       Res.push_back(Inst(RISCV::LUI, Hi20));
 
     if (Lo12 || Hi20 == 0) {
-      unsigned AddiOpc = (Is64Bit && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
+      unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
       Res.push_back(Inst(AddiOpc, Lo12));
     }
     return;
   }
 
-  assert(Is64Bit && "Can't emit >32-bit imm for non-RV64 target");
+  assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
 
   // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
   // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
@@ -68,11 +68,26 @@ void generateInstSeq(int64_t Val, bool I
   int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
   Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
 
-  generateInstSeq(Hi52, Is64Bit, Res);
+  generateInstSeq(Hi52, IsRV64, Res);
 
   Res.push_back(Inst(RISCV::SLLI, ShiftAmount));
   if (Lo12)
     Res.push_back(Inst(RISCV::ADDI, Lo12));
 }
+
+int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) {
+  int PlatRegSize = IsRV64 ? 64 : 32;
+
+  // Split the constant into platform register sized chunks, and calculate cost
+  // of each chunk.
+  int Cost = 0;
+  for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
+    APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
+    InstSeq MatSeq;
+    generateInstSeq(Chunk.getSExtValue(), IsRV64, MatSeq);
+    Cost += MatSeq.size();
+  }
+  return std::max(1, Cost);
+}
 } // namespace RISCVMatInt
 } // namespace llvm

Modified: llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.h?rev=363736&r1=363735&r2=363736&view=diff
==============================================================================
--- llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.h (original)
+++ llvm/trunk/lib/Target/RISCV/Utils/RISCVMatInt.h Tue Jun 18 13:38:08 2019
@@ -9,6 +9,7 @@
 #ifndef LLVM_LIB_TARGET_RISCV_MATINT_H
 #define LLVM_LIB_TARGET_RISCV_MATINT_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/MachineValueType.h"
 #include <cstdint>
@@ -30,6 +31,14 @@ using InstSeq = SmallVector<Inst, 8>;
 // order to allow this helper to be used from both the MC layer and during
 // instruction selection.
 void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res);
+
+// Helper to estimate the number of instructions required to materialise the
+// given immediate value into a register. This estimate does not account for
+// `Val` possibly fitting into an immediate, and so may over-estimate.
+//
+// This will attempt to produce instructions to materialise `Val` as an
+// `Size`-bit immediate. `IsRV64` should match the target architecture.
+int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64);
 } // namespace RISCVMatInt
 } // namespace llvm
 #endif

Added: llvm/trunk/test/CodeGen/RISCV/add-before-shl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/add-before-shl.ll?rev=363736&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/add-before-shl.ll (added)
+++ llvm/trunk/test/CodeGen/RISCV/add-before-shl.ll Tue Jun 18 13:38:08 2019
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I %s
+
+; These test that constant adds are not moved after shifts by DAGCombine,
+; if the constant is cheaper to materialise before it has been shifted.
+
+define signext i32 @add_small_const(i32 signext %a) nounwind {
+; RV32I-LABEL: add_small_const:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, 1
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srai a0, a0, 24
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_small_const:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 1
+  %2 = shl i32 %1, 24
+  %3 = ashr i32 %2, 24
+  ret i32 %3
+}
+
+define signext i32 @add_large_const(i32 signext %a) nounwind {
+; RV32I-LABEL: add_large_const:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    lui a1, 65520
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_large_const:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 4095
+  %2 = shl i32 %1, 16
+  %3 = ashr i32 %2, 16
+  ret i32 %3
+}
+
+define signext i32 @add_huge_const(i32 signext %a) nounwind {
+; RV32I-LABEL: add_huge_const:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    lui a1, 524272
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: add_huge_const:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 8
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+  %1 = add i32 %a, 32767
+  %2 = shl i32 %1, 16
+  %3 = ashr i32 %2, 16
+  ret i32 %3
+}