[llvm] cbd4596 - Recommmit "[RISCV] Improve contant materialization to end with 'not' if the cons… (#66950)"
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 20 19:01:18 PDT 2023
Author: Craig Topper
Date: 2023-09-20T18:51:51-07:00
New Revision: cbd45961684766dd3b1aaa7dcdd35ea3aafe360a
URL: https://github.com/llvm/llvm-project/commit/cbd45961684766dd3b1aaa7dcdd35ea3aafe360a
DIFF: https://github.com/llvm/llvm-project/commit/cbd45961684766dd3b1aaa7dcdd35ea3aafe360a.diff
LOG: Recommmit "[RISCV] Improve contant materialization to end with 'not' if the cons… (#66950)"
With MC test updates.
Original commit message
We can invert the value and treat it as if it had leading zeroes.
Added:
Modified:
llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
llvm/test/CodeGen/RISCV/imm.ll
llvm/test/MC/RISCV/rv64i-aliases-valid.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index f659779e9772055..6c5d0ad5b092708 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -171,6 +171,57 @@ static unsigned extractRotateInfo(int64_t Val) {
return 0;
}
+static void generateInstSeqLeadingZeros(int64_t Val,
+ const FeatureBitset &ActiveFeatures,
+ RISCVMatInt::InstSeq &Res) {
+ assert(Val > 0 && "Expected postive val");
+
+ unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
+ uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
+ // Fill in the bits that will be shifted out with 1s. An example where this
+ // helps is trailing one masks with 32 or more ones. This will generate
+ // ADDI -1 and an SRLI.
+ ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
+
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+
+ // Keep the new sequence if it is an improvement or the original is empty.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ Res = TmpSeq;
+ }
+
+ // Some cases can benefit from filling the lower bits with zeros instead.
+ ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
+ TmpSeq.clear();
+ generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+
+ // Keep the new sequence if it is an improvement or the original is empty.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ Res = TmpSeq;
+ }
+
+ // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
+ // the end of the sequence.
+ if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ // Try replacing upper bits with 1.
+ uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
+ TmpSeq.clear();
+ generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
+
+ // Keep the new sequence if it is an improvement.
+ if ((TmpSeq.size() + 1) < Res.size() ||
+ (Res.empty() && TmpSeq.size() < 8)) {
+ TmpSeq.emplace_back(RISCV::ADD_UW, 0);
+ Res = TmpSeq;
+ }
+ }
+}
+
namespace llvm::RISCVMatInt {
InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
RISCVMatInt::InstSeq Res;
@@ -210,47 +261,21 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// with no leading zeros and use a final SRLI to restore them.
if (Val > 0) {
assert(Res.size() > 2 && "Expected longer sequence");
- unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
- uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
- // Fill in the bits that will be shifted out with 1s. An example where this
- // helps is trailing one masks with 32 or more ones. This will generate
- // ADDI -1 and an SRLI.
- ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
+ generateInstSeqLeadingZeros(Val, ActiveFeatures, Res);
+ }
+ // If the constant is negative, trying inverting and using our trailing zero
+ // optimizations. Use an xori to invert the final value.
+ if (Val < 0 && Res.size() > 3) {
+ uint64_t InvertedVal = ~(uint64_t)Val;
RISCVMatInt::InstSeq TmpSeq;
- generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+ generateInstSeqLeadingZeros(InvertedVal, ActiveFeatures, TmpSeq);
- // Keep the new sequence if it is an improvement.
- if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
- Res = TmpSeq;
- }
-
- // Some cases can benefit from filling the lower bits with zeros instead.
- ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
- TmpSeq.clear();
- generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
-
- // Keep the new sequence if it is an improvement.
- if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+ // Keep it if we found a sequence that is smaller after inverting.
+ if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(RISCV::XORI, -1);
Res = TmpSeq;
}
-
- // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
- // the end of the sequence.
- if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
- // Try replacing upper bits with 1.
- uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
- TmpSeq.clear();
- generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
-
- // Keep the new sequence if it is an improvement.
- if ((TmpSeq.size() + 1) < Res.size()) {
- TmpSeq.emplace_back(RISCV::ADD_UW, 0);
- Res = TmpSeq;
- }
- }
}
// If the Low and High halves are the same, use pack. The pack instruction
@@ -429,6 +454,7 @@ OpndKind Inst::getOpndKind() const {
return RISCVMatInt::RegReg;
case RISCV::ADDI:
case RISCV::ADDIW:
+ case RISCV::XORI:
case RISCV::SLLI:
case RISCV::SRLI:
case RISCV::SLLI_UW:
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 4f9cf1d947d5c35..307d19690bff0ec 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -1058,47 +1058,37 @@ define i64 @imm_end_xori_1() nounwind {
;
; RV64I-LABEL: imm_end_xori_1:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a0, -1
-; RV64I-NEXT: slli a0, a0, 36
-; RV64I-NEXT: addi a0, a0, 1
-; RV64I-NEXT: slli a0, a0, 25
-; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: lui a0, 983040
+; RV64I-NEXT: srli a0, a0, 3
+; RV64I-NEXT: not a0, a0
; RV64I-NEXT: ret
;
; RV64IZBA-LABEL: imm_end_xori_1:
; RV64IZBA: # %bb.0:
-; RV64IZBA-NEXT: li a0, -1
-; RV64IZBA-NEXT: slli a0, a0, 36
-; RV64IZBA-NEXT: addi a0, a0, 1
-; RV64IZBA-NEXT: slli a0, a0, 25
-; RV64IZBA-NEXT: addi a0, a0, -1
+; RV64IZBA-NEXT: lui a0, 983040
+; RV64IZBA-NEXT: srli a0, a0, 3
+; RV64IZBA-NEXT: not a0, a0
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm_end_xori_1:
; RV64IZBB: # %bb.0:
-; RV64IZBB-NEXT: li a0, -1
-; RV64IZBB-NEXT: slli a0, a0, 36
-; RV64IZBB-NEXT: addi a0, a0, 1
-; RV64IZBB-NEXT: slli a0, a0, 25
-; RV64IZBB-NEXT: addi a0, a0, -1
+; RV64IZBB-NEXT: lui a0, 983040
+; RV64IZBB-NEXT: srli a0, a0, 3
+; RV64IZBB-NEXT: not a0, a0
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_end_xori_1:
; RV64IZBS: # %bb.0:
-; RV64IZBS-NEXT: li a0, -1
-; RV64IZBS-NEXT: slli a0, a0, 36
-; RV64IZBS-NEXT: addi a0, a0, 1
-; RV64IZBS-NEXT: slli a0, a0, 25
-; RV64IZBS-NEXT: addi a0, a0, -1
+; RV64IZBS-NEXT: lui a0, 983040
+; RV64IZBS-NEXT: srli a0, a0, 3
+; RV64IZBS-NEXT: not a0, a0
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm_end_xori_1:
; RV64IXTHEADBB: # %bb.0:
-; RV64IXTHEADBB-NEXT: li a0, -1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 36
-; RV64IXTHEADBB-NEXT: addi a0, a0, 1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 25
-; RV64IXTHEADBB-NEXT: addi a0, a0, -1
+; RV64IXTHEADBB-NEXT: lui a0, 983040
+; RV64IXTHEADBB-NEXT: srli a0, a0, 3
+; RV64IXTHEADBB-NEXT: not a0, a0
; RV64IXTHEADBB-NEXT: ret
ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
}
@@ -1174,13 +1164,12 @@ define i64 @imm_2reg_1() nounwind {
;
; RV64-NOPOOL-LABEL: imm_2reg_1:
; RV64-NOPOOL: # %bb.0:
-; RV64-NOPOOL-NEXT: li a0, -1
-; RV64-NOPOOL-NEXT: slli a0, a0, 35
-; RV64-NOPOOL-NEXT: addi a0, a0, 9
+; RV64-NOPOOL-NEXT: lui a0, 1048430
+; RV64-NOPOOL-NEXT: addiw a0, a0, 1493
; RV64-NOPOOL-NEXT: slli a0, a0, 13
-; RV64-NOPOOL-NEXT: addi a0, a0, 837
-; RV64-NOPOOL-NEXT: slli a0, a0, 12
-; RV64-NOPOOL-NEXT: addi a0, a0, 1656
+; RV64-NOPOOL-NEXT: addi a0, a0, -1921
+; RV64-NOPOOL-NEXT: srli a0, a0, 4
+; RV64-NOPOOL-NEXT: not a0, a0
; RV64-NOPOOL-NEXT: ret
;
; RV64I-POOL-LABEL: imm_2reg_1:
@@ -1191,45 +1180,42 @@ define i64 @imm_2reg_1() nounwind {
;
; RV64IZBA-LABEL: imm_2reg_1:
; RV64IZBA: # %bb.0:
-; RV64IZBA-NEXT: li a0, -1
-; RV64IZBA-NEXT: slli a0, a0, 35
-; RV64IZBA-NEXT: addi a0, a0, 9
+; RV64IZBA-NEXT: lui a0, 1048430
+; RV64IZBA-NEXT: addiw a0, a0, 1493
; RV64IZBA-NEXT: slli a0, a0, 13
-; RV64IZBA-NEXT: addi a0, a0, 837
-; RV64IZBA-NEXT: slli a0, a0, 12
-; RV64IZBA-NEXT: addi a0, a0, 1656
+; RV64IZBA-NEXT: addi a0, a0, -1921
+; RV64IZBA-NEXT: srli a0, a0, 4
+; RV64IZBA-NEXT: not a0, a0
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm_2reg_1:
; RV64IZBB: # %bb.0:
-; RV64IZBB-NEXT: li a0, -1
-; RV64IZBB-NEXT: slli a0, a0, 35
-; RV64IZBB-NEXT: addi a0, a0, 9
+; RV64IZBB-NEXT: lui a0, 1048430
+; RV64IZBB-NEXT: addiw a0, a0, 1493
; RV64IZBB-NEXT: slli a0, a0, 13
-; RV64IZBB-NEXT: addi a0, a0, 837
-; RV64IZBB-NEXT: slli a0, a0, 12
-; RV64IZBB-NEXT: addi a0, a0, 1656
+; RV64IZBB-NEXT: addi a0, a0, -1921
+; RV64IZBB-NEXT: srli a0, a0, 4
+; RV64IZBB-NEXT: not a0, a0
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_2reg_1:
; RV64IZBS: # %bb.0:
-; RV64IZBS-NEXT: lui a0, 74565
-; RV64IZBS-NEXT: addiw a0, a0, 1656
-; RV64IZBS-NEXT: bseti a0, a0, 60
-; RV64IZBS-NEXT: bseti a0, a0, 61
-; RV64IZBS-NEXT: bseti a0, a0, 62
-; RV64IZBS-NEXT: bseti a0, a0, 63
+; RV64IZBS-NEXT: lui a0, 1048430
+; RV64IZBS-NEXT: addiw a0, a0, 1493
+; RV64IZBS-NEXT: slli a0, a0, 13
+; RV64IZBS-NEXT: addi a0, a0, -1921
+; RV64IZBS-NEXT: srli a0, a0, 4
+; RV64IZBS-NEXT: not a0, a0
; RV64IZBS-NEXT: ret
;
; RV64IXTHEADBB-LABEL: imm_2reg_1:
; RV64IXTHEADBB: # %bb.0:
-; RV64IXTHEADBB-NEXT: li a0, -1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 35
-; RV64IXTHEADBB-NEXT: addi a0, a0, 9
+; RV64IXTHEADBB-NEXT: lui a0, 1048430
+; RV64IXTHEADBB-NEXT: addiw a0, a0, 1493
; RV64IXTHEADBB-NEXT: slli a0, a0, 13
-; RV64IXTHEADBB-NEXT: addi a0, a0, 837
-; RV64IXTHEADBB-NEXT: slli a0, a0, 12
-; RV64IXTHEADBB-NEXT: addi a0, a0, 1656
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1921
+; RV64IXTHEADBB-NEXT: srli a0, a0, 4
+; RV64IXTHEADBB-NEXT: not a0, a0
; RV64IXTHEADBB-NEXT: ret
ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
}
@@ -1724,13 +1710,12 @@ define i64 @imm_neg_9223372034778874949() {
;
; RV64-NOPOOL-LABEL: imm_neg_9223372034778874949:
; RV64-NOPOOL: # %bb.0:
-; RV64-NOPOOL-NEXT: li a0, -1
-; RV64-NOPOOL-NEXT: slli a0, a0, 37
-; RV64-NOPOOL-NEXT: addi a0, a0, 31
+; RV64-NOPOOL-NEXT: lui a0, 1048329
+; RV64-NOPOOL-NEXT: addiw a0, a0, -1911
; RV64-NOPOOL-NEXT: slli a0, a0, 12
-; RV64-NOPOOL-NEXT: addi a0, a0, -273
-; RV64-NOPOOL-NEXT: slli a0, a0, 14
-; RV64-NOPOOL-NEXT: addi a0, a0, -1093
+; RV64-NOPOOL-NEXT: addi a0, a0, -1911
+; RV64-NOPOOL-NEXT: srli a0, a0, 1
+; RV64-NOPOOL-NEXT: not a0, a0
; RV64-NOPOOL-NEXT: ret
;
; RV64I-POOL-LABEL: imm_neg_9223372034778874949:
@@ -1741,24 +1726,22 @@ define i64 @imm_neg_9223372034778874949() {
;
; RV64IZBA-LABEL: imm_neg_9223372034778874949:
; RV64IZBA: # %bb.0:
-; RV64IZBA-NEXT: li a0, -1
-; RV64IZBA-NEXT: slli a0, a0, 37
-; RV64IZBA-NEXT: addi a0, a0, 31
+; RV64IZBA-NEXT: lui a0, 1048329
+; RV64IZBA-NEXT: addiw a0, a0, -1911
; RV64IZBA-NEXT: slli a0, a0, 12
-; RV64IZBA-NEXT: addi a0, a0, -273
-; RV64IZBA-NEXT: slli a0, a0, 14
-; RV64IZBA-NEXT: addi a0, a0, -1093
+; RV64IZBA-NEXT: addi a0, a0, -1911
+; RV64IZBA-NEXT: srli a0, a0, 1
+; RV64IZBA-NEXT: not a0, a0
; RV64IZBA-NEXT: ret
;
; RV64IZBB-LABEL: imm_neg_9223372034778874949:
; RV64IZBB: # %bb.0:
-; RV64IZBB-NEXT: li a0, -1
-; RV64IZBB-NEXT: slli a0, a0, 37
-; RV64IZBB-NEXT: addi a0, a0, 31
+; RV64IZBB-NEXT: lui a0, 1048329
+; RV64IZBB-NEXT: addiw a0, a0, -1911
; RV64IZBB-NEXT: slli a0, a0, 12
-; RV64IZBB-NEXT: addi a0, a0, -273
-; RV64IZBB-NEXT: slli a0, a0, 14
-; RV64IZBB-NEXT: addi a0, a0, -1093
+; RV64IZBB-NEXT: addi a0, a0, -1911
+; RV64IZBB-NEXT: srli a0, a0, 1
+; RV64IZBB-NEXT: not a0, a0
; RV64IZBB-NEXT: ret
;
; RV64IZBS-LABEL: imm_neg_9223372034778874949:
@@ -1770,13 +1753,12 @@ define i64 @imm_neg_9223372034778874949() {
;
; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
; RV64IXTHEADBB: # %bb.0:
-; RV64IXTHEADBB-NEXT: li a0, -1
-; RV64IXTHEADBB-NEXT: slli a0, a0, 37
-; RV64IXTHEADBB-NEXT: addi a0, a0, 31
+; RV64IXTHEADBB-NEXT: lui a0, 1048329
+; RV64IXTHEADBB-NEXT: addiw a0, a0, -1911
; RV64IXTHEADBB-NEXT: slli a0, a0, 12
-; RV64IXTHEADBB-NEXT: addi a0, a0, -273
-; RV64IXTHEADBB-NEXT: slli a0, a0, 14
-; RV64IXTHEADBB-NEXT: addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT: addi a0, a0, -1911
+; RV64IXTHEADBB-NEXT: srli a0, a0, 1
+; RV64IXTHEADBB-NEXT: not a0, a0
; RV64IXTHEADBB-NEXT: ret
ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
}
diff --git a/llvm/test/MC/RISCV/rv64i-aliases-valid.s b/llvm/test/MC/RISCV/rv64i-aliases-valid.s
index a79c9389207454b..67533e944e86a7a 100644
--- a/llvm/test/MC/RISCV/rv64i-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rv64i-aliases-valid.s
@@ -153,16 +153,12 @@ li x8, 0xFFFFFFF001
# CHECK-EXPAND-NEXT: slli s1, s1, 20
# CHECK-EXPAND-NEXT: addi s1, s1, -3
li x9, 0x1000FFFFFFFD
-# CHECK-INST: addi a0, zero, -1
-# CHECK-INST-NEXT: slli a0, a0, 36
-# CHECK-INST-NEXT: addi a0, a0, 1
-# CHECK-INST-NEXT: slli a0, a0, 25
-# CHECK-INST-NEXT: addi a0, a0, -1
-# CHECK-ALIAS: li a0, -1
-# CHECK-ALIAS-NEXT: slli a0, a0, 36
-# CHECK-ALIAS-NEXT: addi a0, a0, 1
-# CHECK-ALIAS-NEXT: slli a0, a0, 25
-# CHECK-ALIAS-NEXT: addi a0, a0, -1
+# CHECK-INST: lui a0, 983040
+# CHECK-INST-NEXT: srli a0, a0, 3
+# CHECK-INST-NEXT: xori a0, a0, -1
+# CHECK-ALIAS: lui a0, 983040
+# CHECK-ALIAS-NEXT: srli a0, a0, 3
+# CHECK-ALIAS-NEXT: not a0, a0
li x10, 0xE000000001FFFFFF
# CHECK-INST: addi a1, zero, -2047
# CHECK-INST-NEXT: slli a1, a1, 27
@@ -393,16 +389,12 @@ lla x8, 0xFFFFFFF001
# CHECK-EXPAND-NEXT: addi s1, s1, -3
la x9, 0x1000FFFFFFFD
lla x9, 0x1000FFFFFFFD
-# CHECK-INST: addi a0, zero, -1
-# CHECK-INST-NEXT: slli a0, a0, 36
-# CHECK-INST-NEXT: addi a0, a0, 1
-# CHECK-INST-NEXT: slli a0, a0, 25
-# CHECK-INST-NEXT: addi a0, a0, -1
-# CHECK-ALIAS: li a0, -1
-# CHECK-ALIAS-NEXT: slli a0, a0, 36
-# CHECK-ALIAS-NEXT: addi a0, a0, 1
-# CHECK-ALIAS-NEXT: slli a0, a0, 25
-# CHECK-ALIAS-NEXT: addi a0, a0, -1
+# CHECK-INST: lui a0, 983040
+# CHECK-INST-NEXT: srli a0, a0, 3
+# CHECK-INST-NEXT: xori a0, a0, -1
+# CHECK-ALIAS: lui a0, 983040
+# CHECK-ALIAS-NEXT: srli a0, a0, 3
+# CHECK-ALIAS-NEXT: not a0, a0
la x10, 0xE000000001FFFFFF
lla x10, 0xE000000001FFFFFF
# CHECK-INST: addi a1, zero, -2047
More information about the llvm-commits
mailing list