[llvm] [RISCV] Generalize the (ADD (SLLI X, 32), X) special case in constant… (PR #66931)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 20 10:31:10 PDT 2023
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/66931
… materialization.
We don't have to limit ourselves to a shift amount of 32. We can support other shift amounts that make the upper 32 bits line up.
>From 28c270be8d0c4336f980ab58740c9f2f4d2ad2bf Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 20 Sep 2023 10:07:24 -0700
Subject: [PATCH] [RISCV] Generalize the (ADD (SLLI X, 32), X) special case in
constant materialization.
We don't have to limit ourselves to a shift amount of 32. We can
support other shift amounts that make the upper 32 bits line up.
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 45 +++--
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 38 ++--
llvm/test/CodeGen/RISCV/imm.ll | 199 ++++++++------------
3 files changed, 131 insertions(+), 151 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index e6d0346c45e8d54..0e3e211fbf73b85 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -195,28 +195,41 @@ static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
RISCVMatInt::InstSeq Seq =
RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
- // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at
+ // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
// worst an LUI+ADDIW. This will require an extra register, but avoids a
// constant pool.
// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
// low and high 32 bits are the same and bit 31 and 63 are set.
if (Seq.size() > 3) {
int64_t LoVal = SignExtend64<32>(Imm);
- int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
- if (LoVal == HiVal ||
- (Subtarget.hasStdExtZba() && Lo_32(Imm) == Hi_32(Imm))) {
- RISCVMatInt::InstSeq SeqLo =
- RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
- if ((SeqLo.size() + 2) < Seq.size()) {
- SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
-
- SDValue SLLI = SDValue(
- CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
- CurDAG->getTargetConstant(32, DL, VT)),
- 0);
- // Prefer ADD when possible.
- unsigned AddOpc = (LoVal == HiVal) ? RISCV::ADD : RISCV::ADD_UW;
- return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
+ if (LoVal != 0) {
+ // Subtract the LoVal to emulate the effect of the final add.
+ uint64_t Tmp = (uint64_t)Imm - (uint64_t)LoVal;
+
+ // Use trailing zero counts to figure how far we need to shift LoVal to
+ // line up with the remaining constant.
+ unsigned TzLo = llvm::countr_zero((uint64_t)LoVal);
+ unsigned TzHi = llvm::countr_zero(Tmp);
+ assert(TzLo < 32 && TzHi >= 32);
+ unsigned ShiftAmt = TzHi - TzLo;
+
+ bool MatchedAdd = Tmp == ((uint64_t)LoVal << ShiftAmt);
+
+ if (MatchedAdd ||
+ (Subtarget.hasStdExtZba() && Lo_32(Imm) == Hi_32(Imm))) {
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
+ if ((SeqLo.size() + 2) < Seq.size()) {
+ SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
+
+ SDValue SLLI = SDValue(
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
+ CurDAG->getTargetConstant(32, DL, VT)),
+ 0);
+ // Prefer ADD when possible.
+ unsigned AddOpc = MatchedAdd ? RISCV::ADD : RISCV::ADD_UW;
+ return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
+ }
}
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f1cea6c6756f4fc..2ff6c26ff53fcb9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4954,24 +4954,38 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
return Op;
- // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do
+ // Optimizations below are disable for opt size. If we're optimizing for size,
+ // use a constant pool.
+ if (DAG.shouldOptForSize())
+ return SDValue();
+
+ // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
// that if it will avoid a constant pool.
// It will require an extra temporary register though.
// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
// low and high 32 bits are the same and bit 31 and 63 are set.
- if (!DAG.shouldOptForSize()) {
- int64_t LoVal = SignExtend64<32>(Imm);
- int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
- if (LoVal == HiVal ||
- (Subtarget.hasStdExtZba() && Lo_32(Imm) == Hi_32(Imm))) {
- RISCVMatInt::InstSeq SeqLo =
- RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
- if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
- return Op;
- }
+ int64_t LoVal = SignExtend64<32>(Imm);
+ if (LoVal == 0)
+ return SDValue();
+
+ // Subtract the LoVal to emulate the effect of the final add.
+ uint64_t Tmp = (uint64_t)Imm - (uint64_t)LoVal;
+
+ // Use trailing zero counts to figure how far we need to shift LoVal to line
+ // up with the remaining constant.
+ unsigned TzLo = llvm::countr_zero((uint64_t)LoVal);
+ unsigned TzHi = llvm::countr_zero(Tmp);
+ assert(TzLo < 32 && TzHi >= 32);
+ unsigned ShiftAmt = TzHi - TzLo;
+
+ if (Tmp == ((uint64_t)LoVal << ShiftAmt) ||
+ (Subtarget.hasStdExtZba() && Lo_32(Imm) == Hi_32(Imm))) {
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
+ if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
+ return Op;
}
- // Expand to a constant pool using the default expansion code.
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 4f9cf1d947d5c35..205693847e64a40 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -1058,47 +1058,42 @@ define i64 @imm_end_xori_1() nounwind {
;
; RV64I-LABEL: imm_end_xori_1:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: slli a0, a0, 36
-; RV64I-NEXT: addi a0, a0, 1
-; RV64I-NEXT: slli a0, a0, 25
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: lui a0, 8192
+; RV64I-NEXT: addiw a0, a0, -1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64IZBA-LABEL: imm_end_xori_1:
; RV64IZBA: # %bb.0:
-; RV64IZBA-NEXT: li a0, -1
-; RV64IZBA-NEXT: slli a0, a0, 36
-; RV64IZBA-NEXT: addi a0, a0, 1
-; RV64IZBA-NEXT: slli a0, a0, 25
-; RV64IZBA-NEXT: addi a0, a0, -1
+; RV64IZBA-NEXT: lui a0, 8192
+; RV64IZBA-NEXT: addiw a0, a0, -1
+; RV64IZBA-NEXT: slli a1, a0, 32
+; RV64IZBA-NEXT: add a0, a0, a1
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm_end_xori_1:
; RV64IZBB: # %bb.0:
-; RV64IZBB-NEXT: li a0, -1
-; RV64IZBB-NEXT: slli a0, a0, 36
-; RV64IZBB-NEXT: addi a0, a0, 1
-; RV64IZBB-NEXT: slli a0, a0, 25
-; RV64IZBB-NEXT: addi a0, a0, -1
+; RV64IZBB-NEXT: lui a0, 8192
+; RV64IZBB-NEXT: addiw a0, a0, -1
+; RV64IZBB-NEXT: slli a1, a0, 32
+; RV64IZBB-NEXT: add a0, a0, a1
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_end_xori_1:
; RV64IZBS: # %bb.0:
-; RV64IZBS-NEXT: li a0, -1
-; RV64IZBS-NEXT: slli a0, a0, 36
-; RV64IZBS-NEXT: addi a0, a0, 1
-; RV64IZBS-NEXT: slli a0, a0, 25
-; RV64IZBS-NEXT: addi a0, a0, -1
+; RV64IZBS-NEXT: lui a0, 8192
+; RV64IZBS-NEXT: addiw a0, a0, -1
+; RV64IZBS-NEXT: slli a1, a0, 32
+; RV64IZBS-NEXT: add a0, a0, a1
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm_end_xori_1:
; RV64IXTHEADBB: # %bb.0:
-; RV64IXTHEADBB-NEXT: li a0, -1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 36
-; RV64IXTHEADBB-NEXT: addi a0, a0, 1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 25
-; RV64IXTHEADBB-NEXT: addi a0, a0, -1
+; RV64IXTHEADBB-NEXT: lui a0, 8192
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1
+; RV64IXTHEADBB-NEXT: slli a1, a0, 32
+; RV64IXTHEADBB-NEXT: add a0, a0, a1
; RV64IXTHEADBB-NEXT: ret
ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
}
@@ -1172,64 +1167,44 @@ define i64 @imm_2reg_1() nounwind {
; RV32I-NEXT: lui a1, 983040
; RV32I-NEXT: ret
;
-; RV64-NOPOOL-LABEL: imm_2reg_1:
-; RV64-NOPOOL: # %bb.0:
-; RV64-NOPOOL-NEXT: li a0, -1
-; RV64-NOPOOL-NEXT: slli a0, a0, 35
-; RV64-NOPOOL-NEXT: addi a0, a0, 9
-; RV64-NOPOOL-NEXT: slli a0, a0, 13
-; RV64-NOPOOL-NEXT: addi a0, a0, 837
-; RV64-NOPOOL-NEXT: slli a0, a0, 12
-; RV64-NOPOOL-NEXT: addi a0, a0, 1656
-; RV64-NOPOOL-NEXT: ret
-;
-; RV64I-POOL-LABEL: imm_2reg_1:
-; RV64I-POOL: # %bb.0:
-; RV64I-POOL-NEXT: lui a0, %hi(.LCPI27_0)
-; RV64I-POOL-NEXT: ld a0, %lo(.LCPI27_0)(a0)
-; RV64I-POOL-NEXT: ret
+; RV64I-LABEL: imm_2reg_1:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a0, 74565
+; RV64I-NEXT: addiw a0, a0, 1656
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
;
; RV64IZBA-LABEL: imm_2reg_1:
; RV64IZBA: # %bb.0:
-; RV64IZBA-NEXT: li a0, -1
-; RV64IZBA-NEXT: slli a0, a0, 35
-; RV64IZBA-NEXT: addi a0, a0, 9
-; RV64IZBA-NEXT: slli a0, a0, 13
-; RV64IZBA-NEXT: addi a0, a0, 837
-; RV64IZBA-NEXT: slli a0, a0, 12
-; RV64IZBA-NEXT: addi a0, a0, 1656
+; RV64IZBA-NEXT: lui a0, 74565
+; RV64IZBA-NEXT: addiw a0, a0, 1656
+; RV64IZBA-NEXT: slli a1, a0, 32
+; RV64IZBA-NEXT: add a0, a0, a1
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm_2reg_1:
; RV64IZBB: # %bb.0:
-; RV64IZBB-NEXT: li a0, -1
-; RV64IZBB-NEXT: slli a0, a0, 35
-; RV64IZBB-NEXT: addi a0, a0, 9
-; RV64IZBB-NEXT: slli a0, a0, 13
-; RV64IZBB-NEXT: addi a0, a0, 837
-; RV64IZBB-NEXT: slli a0, a0, 12
-; RV64IZBB-NEXT: addi a0, a0, 1656
+; RV64IZBB-NEXT: lui a0, 74565
+; RV64IZBB-NEXT: addiw a0, a0, 1656
+; RV64IZBB-NEXT: slli a1, a0, 32
+; RV64IZBB-NEXT: add a0, a0, a1
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_2reg_1:
; RV64IZBS: # %bb.0:
; RV64IZBS-NEXT: lui a0, 74565
; RV64IZBS-NEXT: addiw a0, a0, 1656
-; RV64IZBS-NEXT: bseti a0, a0, 60
-; RV64IZBS-NEXT: bseti a0, a0, 61
-; RV64IZBS-NEXT: bseti a0, a0, 62
-; RV64IZBS-NEXT: bseti a0, a0, 63
+; RV64IZBS-NEXT: slli a1, a0, 32
+; RV64IZBS-NEXT: add a0, a0, a1
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm_2reg_1:
; RV64IXTHEADBB: # %bb.0:
-; RV64IXTHEADBB-NEXT: li a0, -1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 35
-; RV64IXTHEADBB-NEXT: addi a0, a0, 9
-; RV64IXTHEADBB-NEXT: slli a0, a0, 13
-; RV64IXTHEADBB-NEXT: addi a0, a0, 837
-; RV64IXTHEADBB-NEXT: slli a0, a0, 12
-; RV64IXTHEADBB-NEXT: addi a0, a0, 1656
+; RV64IXTHEADBB-NEXT: lui a0, 74565
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 1656
+; RV64IXTHEADBB-NEXT: slli a1, a0, 32
+; RV64IXTHEADBB-NEXT: add a0, a0, a1
; RV64IXTHEADBB-NEXT: ret
ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
}
@@ -1722,43 +1697,28 @@ define i64 @imm_neg_9223372034778874949() {
; RV32I-NEXT: lui a1, 524288
; RV32I-NEXT: ret
;
-; RV64-NOPOOL-LABEL: imm_neg_9223372034778874949:
-; RV64-NOPOOL: # %bb.0:
-; RV64-NOPOOL-NEXT: li a0, -1
-; RV64-NOPOOL-NEXT: slli a0, a0, 37
-; RV64-NOPOOL-NEXT: addi a0, a0, 31
-; RV64-NOPOOL-NEXT: slli a0, a0, 12
-; RV64-NOPOOL-NEXT: addi a0, a0, -273
-; RV64-NOPOOL-NEXT: slli a0, a0, 14
-; RV64-NOPOOL-NEXT: addi a0, a0, -1093
-; RV64-NOPOOL-NEXT: ret
-;
-; RV64I-POOL-LABEL: imm_neg_9223372034778874949:
-; RV64I-POOL: # %bb.0:
-; RV64I-POOL-NEXT: lui a0, %hi(.LCPI38_0)
-; RV64I-POOL-NEXT: ld a0, %lo(.LCPI38_0)(a0)
-; RV64I-POOL-NEXT: ret
+; RV64I-LABEL: imm_neg_9223372034778874949:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a0, 506812
+; RV64I-NEXT: addiw a0, a0, -1093
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
;
; RV64IZBA-LABEL: imm_neg_9223372034778874949:
; RV64IZBA: # %bb.0:
-; RV64IZBA-NEXT: li a0, -1
-; RV64IZBA-NEXT: slli a0, a0, 37
-; RV64IZBA-NEXT: addi a0, a0, 31
-; RV64IZBA-NEXT: slli a0, a0, 12
-; RV64IZBA-NEXT: addi a0, a0, -273
-; RV64IZBA-NEXT: slli a0, a0, 14
-; RV64IZBA-NEXT: addi a0, a0, -1093
+; RV64IZBA-NEXT: lui a0, 506812
+; RV64IZBA-NEXT: addiw a0, a0, -1093
+; RV64IZBA-NEXT: slli a1, a0, 32
+; RV64IZBA-NEXT: add a0, a0, a1
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm_neg_9223372034778874949:
; RV64IZBB: # %bb.0:
-; RV64IZBB-NEXT: li a0, -1
-; RV64IZBB-NEXT: slli a0, a0, 37
-; RV64IZBB-NEXT: addi a0, a0, 31
-; RV64IZBB-NEXT: slli a0, a0, 12
-; RV64IZBB-NEXT: addi a0, a0, -273
-; RV64IZBB-NEXT: slli a0, a0, 14
-; RV64IZBB-NEXT: addi a0, a0, -1093
+; RV64IZBB-NEXT: lui a0, 506812
+; RV64IZBB-NEXT: addiw a0, a0, -1093
+; RV64IZBB-NEXT: slli a1, a0, 32
+; RV64IZBB-NEXT: add a0, a0, a1
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_neg_9223372034778874949:
@@ -1770,13 +1730,10 @@ define i64 @imm_neg_9223372034778874949() {
;
; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
; RV64IXTHEADBB: # %bb.0:
-; RV64IXTHEADBB-NEXT: li a0, -1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 37
-; RV64IXTHEADBB-NEXT: addi a0, a0, 31
-; RV64IXTHEADBB-NEXT: slli a0, a0, 12
-; RV64IXTHEADBB-NEXT: addi a0, a0, -273
-; RV64IXTHEADBB-NEXT: slli a0, a0, 14
-; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: lui a0, 506812
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT: slli a1, a0, 32
+; RV64IXTHEADBB-NEXT: add a0, a0, a1
; RV64IXTHEADBB-NEXT: ret
ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
}
@@ -1955,29 +1912,26 @@ define i64 @imm_9223372034904144827() {
;
; RV64I-LABEL: imm_9223372034904144827:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a0, 1048343
-; RV64I-NEXT: addiw a0, a0, 1911
-; RV64I-NEXT: slli a0, a0, 12
-; RV64I-NEXT: addi a0, a0, 1911
-; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: lui a0, 572348
+; RV64I-NEXT: addiw a0, a0, -1093
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64IZBA-LABEL: imm_9223372034904144827:
; RV64IZBA: # %bb.0:
-; RV64IZBA-NEXT: lui a0, 1048343
-; RV64IZBA-NEXT: addiw a0, a0, 1911
-; RV64IZBA-NEXT: slli a0, a0, 12
-; RV64IZBA-NEXT: addi a0, a0, 1911
-; RV64IZBA-NEXT: srli a0, a0, 1
+; RV64IZBA-NEXT: lui a0, 572348
+; RV64IZBA-NEXT: addiw a0, a0, -1093
+; RV64IZBA-NEXT: slli a1, a0, 32
+; RV64IZBA-NEXT: add a0, a0, a1
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm_9223372034904144827:
; RV64IZBB: # %bb.0:
-; RV64IZBB-NEXT: lui a0, 1048343
-; RV64IZBB-NEXT: addiw a0, a0, 1911
-; RV64IZBB-NEXT: slli a0, a0, 12
-; RV64IZBB-NEXT: addi a0, a0, 1911
-; RV64IZBB-NEXT: srli a0, a0, 1
+; RV64IZBB-NEXT: lui a0, 572348
+; RV64IZBB-NEXT: addiw a0, a0, -1093
+; RV64IZBB-NEXT: slli a1, a0, 32
+; RV64IZBB-NEXT: add a0, a0, a1
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_9223372034904144827:
@@ -1989,11 +1943,10 @@ define i64 @imm_9223372034904144827() {
;
; RV64IXTHEADBB-LABEL: imm_9223372034904144827:
; RV64IXTHEADBB: # %bb.0:
-; RV64IXTHEADBB-NEXT: lui a0, 1048343
-; RV64IXTHEADBB-NEXT: addiw a0, a0, 1911
-; RV64IXTHEADBB-NEXT: slli a0, a0, 12
-; RV64IXTHEADBB-NEXT: addi a0, a0, 1911
-; RV64IXTHEADBB-NEXT: srli a0, a0, 1
+; RV64IXTHEADBB-NEXT: lui a0, 572348
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093
+; RV64IXTHEADBB-NEXT: slli a1, a0, 32
+; RV64IXTHEADBB-NEXT: add a0, a0, a1
; RV64IXTHEADBB-NEXT: ret
ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb
}
More information about the llvm-commits
mailing list