[llvm] [RISCV] Improve contant materialization to end with 'not' if the cons… (PR #66950)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 20 13:23:45 PDT 2023


https://github.com/topperc created https://github.com/llvm/llvm-project/pull/66950

…tant has leading ones.

We can invert the value and treat it as if it had leading zeroes.

>From 8f93c5c3195c20365ac91b908c8e1ac4d8e6279a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 20 Sep 2023 13:18:24 -0700
Subject: [PATCH] [RISCV] Improve contant materialization to end with 'not' if
 the constant has leading ones.

We can invert the value and treat it as if it had leading zeroes.
---
 .../Target/RISCV/MCTargetDesc/RISCVMatInt.cpp |  98 +++++++-----
 llvm/test/CodeGen/RISCV/imm.ll                | 140 ++++++++----------
 2 files changed, 123 insertions(+), 115 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index f659779e9772055..6c5d0ad5b092708 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -171,6 +171,57 @@ static unsigned extractRotateInfo(int64_t Val) {
   return 0;
 }
 
+static void generateInstSeqLeadingZeros(int64_t Val,
+                                        const FeatureBitset &ActiveFeatures,
+                                        RISCVMatInt::InstSeq &Res) {
+  assert(Val > 0 && "Expected postive val");
+
+  unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
+  uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
+  // Fill in the bits that will be shifted out with 1s. An example where this
+  // helps is trailing one masks with 32 or more ones. This will generate
+  // ADDI -1 and an SRLI.
+  ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
+
+  RISCVMatInt::InstSeq TmpSeq;
+  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+
+  // Keep the new sequence if it is an improvement or the original is empty.
+  if ((TmpSeq.size() + 1) < Res.size() ||
+      (Res.empty() && TmpSeq.size() < 8)) {
+    TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+    Res = TmpSeq;
+  }
+
+  // Some cases can benefit from filling the lower bits with zeros instead.
+  ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
+  TmpSeq.clear();
+  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+
+  // Keep the new sequence if it is an improvement or the original is empty.
+  if ((TmpSeq.size() + 1) < Res.size() ||
+      (Res.empty() && TmpSeq.size() < 8)) {
+    TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+    Res = TmpSeq;
+  }
+
+  // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
+  // the end of the sequence.
+  if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
+    // Try replacing upper bits with 1.
+    uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
+    TmpSeq.clear();
+    generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
+
+    // Keep the new sequence if it is an improvement.
+    if ((TmpSeq.size() + 1) < Res.size() ||
+        (Res.empty() && TmpSeq.size() < 8)) {
+      TmpSeq.emplace_back(RISCV::ADD_UW, 0);
+      Res = TmpSeq;
+    }
+  }
+}
+
 namespace llvm::RISCVMatInt {
 InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
   RISCVMatInt::InstSeq Res;
@@ -210,47 +261,21 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
   // with no leading zeros and use a final SRLI to restore them.
   if (Val > 0) {
     assert(Res.size() > 2 && "Expected longer sequence");
-    unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
-    uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
-    // Fill in the bits that will be shifted out with 1s. An example where this
-    // helps is trailing one masks with 32 or more ones. This will generate
-    // ADDI -1 and an SRLI.
-    ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
+    generateInstSeqLeadingZeros(Val, ActiveFeatures, Res);
+  }
 
+  // If the constant is negative, trying inverting and using our trailing zero
+  // optimizations. Use an xori to invert the final value.
+  if (Val < 0 && Res.size() > 3) {
+    uint64_t InvertedVal = ~(uint64_t)Val;
     RISCVMatInt::InstSeq TmpSeq;
-    generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+    generateInstSeqLeadingZeros(InvertedVal, ActiveFeatures, TmpSeq);
 
-    // Keep the new sequence if it is an improvement.
-    if ((TmpSeq.size() + 1) < Res.size()) {
-      TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
-      Res = TmpSeq;
-    }
-
-    // Some cases can benefit from filling the lower bits with zeros instead.
-    ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
-    TmpSeq.clear();
-    generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
-
-    // Keep the new sequence if it is an improvement.
-    if ((TmpSeq.size() + 1) < Res.size()) {
-      TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+    // Keep it if we found a sequence that is smaller after inverting.
+    if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) {
+      TmpSeq.emplace_back(RISCV::XORI, -1);
       Res = TmpSeq;
     }
-
-    // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
-    // the end of the sequence.
-    if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
-      // Try replacing upper bits with 1.
-      uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
-      TmpSeq.clear();
-      generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
-
-      // Keep the new sequence if it is an improvement.
-      if ((TmpSeq.size() + 1) < Res.size()) {
-        TmpSeq.emplace_back(RISCV::ADD_UW, 0);
-        Res = TmpSeq;
-      }
-    }
   }
 
   // If the Low and High halves are the same, use pack. The pack instruction
@@ -429,6 +454,7 @@ OpndKind Inst::getOpndKind() const {
     return RISCVMatInt::RegReg;
   case RISCV::ADDI:
   case RISCV::ADDIW:
+  case RISCV::XORI:
   case RISCV::SLLI:
   case RISCV::SRLI:
   case RISCV::SLLI_UW:
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 4f9cf1d947d5c35..307d19690bff0ec 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -1058,47 +1058,37 @@ define i64 @imm_end_xori_1() nounwind {
 ;
 ; RV64I-LABEL: imm_end_xori_1:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, -1
-; RV64I-NEXT:    slli a0, a0, 36
-; RV64I-NEXT:    addi a0, a0, 1
-; RV64I-NEXT:    slli a0, a0, 25
-; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    lui a0, 983040
+; RV64I-NEXT:    srli a0, a0, 3
+; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm_end_xori_1:
 ; RV64IZBA:       # %bb.0:
-; RV64IZBA-NEXT:    li a0, -1
-; RV64IZBA-NEXT:    slli a0, a0, 36
-; RV64IZBA-NEXT:    addi a0, a0, 1
-; RV64IZBA-NEXT:    slli a0, a0, 25
-; RV64IZBA-NEXT:    addi a0, a0, -1
+; RV64IZBA-NEXT:    lui a0, 983040
+; RV64IZBA-NEXT:    srli a0, a0, 3
+; RV64IZBA-NEXT:    not a0, a0
 ; RV64IZBA-NEXT:    ret
 ;
 ; RV64IZBB-LABEL: imm_end_xori_1:
 ; RV64IZBB:       # %bb.0:
-; RV64IZBB-NEXT:    li a0, -1
-; RV64IZBB-NEXT:    slli a0, a0, 36
-; RV64IZBB-NEXT:    addi a0, a0, 1
-; RV64IZBB-NEXT:    slli a0, a0, 25
-; RV64IZBB-NEXT:    addi a0, a0, -1
+; RV64IZBB-NEXT:    lui a0, 983040
+; RV64IZBB-NEXT:    srli a0, a0, 3
+; RV64IZBB-NEXT:    not a0, a0
 ; RV64IZBB-NEXT:    ret
 ;
 ; RV64IZBS-LABEL: imm_end_xori_1:
 ; RV64IZBS:       # %bb.0:
-; RV64IZBS-NEXT:    li a0, -1
-; RV64IZBS-NEXT:    slli a0, a0, 36
-; RV64IZBS-NEXT:    addi a0, a0, 1
-; RV64IZBS-NEXT:    slli a0, a0, 25
-; RV64IZBS-NEXT:    addi a0, a0, -1
+; RV64IZBS-NEXT:    lui a0, 983040
+; RV64IZBS-NEXT:    srli a0, a0, 3
+; RV64IZBS-NEXT:    not a0, a0
 ; RV64IZBS-NEXT:    ret
 ;
 ; RV64IXTHEADBB-LABEL: imm_end_xori_1:
 ; RV64IXTHEADBB:       # %bb.0:
-; RV64IXTHEADBB-NEXT:    li a0, -1
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 36
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 25
-; RV64IXTHEADBB-NEXT:    addi a0, a0, -1
+; RV64IXTHEADBB-NEXT:    lui a0, 983040
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 3
+; RV64IXTHEADBB-NEXT:    not a0, a0
 ; RV64IXTHEADBB-NEXT:    ret
   ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
 }
@@ -1174,13 +1164,12 @@ define i64 @imm_2reg_1() nounwind {
 ;
 ; RV64-NOPOOL-LABEL: imm_2reg_1:
 ; RV64-NOPOOL:       # %bb.0:
-; RV64-NOPOOL-NEXT:    li a0, -1
-; RV64-NOPOOL-NEXT:    slli a0, a0, 35
-; RV64-NOPOOL-NEXT:    addi a0, a0, 9
+; RV64-NOPOOL-NEXT:    lui a0, 1048430
+; RV64-NOPOOL-NEXT:    addiw a0, a0, 1493
 ; RV64-NOPOOL-NEXT:    slli a0, a0, 13
-; RV64-NOPOOL-NEXT:    addi a0, a0, 837
-; RV64-NOPOOL-NEXT:    slli a0, a0, 12
-; RV64-NOPOOL-NEXT:    addi a0, a0, 1656
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1921
+; RV64-NOPOOL-NEXT:    srli a0, a0, 4
+; RV64-NOPOOL-NEXT:    not a0, a0
 ; RV64-NOPOOL-NEXT:    ret
 ;
 ; RV64I-POOL-LABEL: imm_2reg_1:
@@ -1191,45 +1180,42 @@ define i64 @imm_2reg_1() nounwind {
 ;
 ; RV64IZBA-LABEL: imm_2reg_1:
 ; RV64IZBA:       # %bb.0:
-; RV64IZBA-NEXT:    li a0, -1
-; RV64IZBA-NEXT:    slli a0, a0, 35
-; RV64IZBA-NEXT:    addi a0, a0, 9
+; RV64IZBA-NEXT:    lui a0, 1048430
+; RV64IZBA-NEXT:    addiw a0, a0, 1493
 ; RV64IZBA-NEXT:    slli a0, a0, 13
-; RV64IZBA-NEXT:    addi a0, a0, 837
-; RV64IZBA-NEXT:    slli a0, a0, 12
-; RV64IZBA-NEXT:    addi a0, a0, 1656
+; RV64IZBA-NEXT:    addi a0, a0, -1921
+; RV64IZBA-NEXT:    srli a0, a0, 4
+; RV64IZBA-NEXT:    not a0, a0
 ; RV64IZBA-NEXT:    ret
 ;
 ; RV64IZBB-LABEL: imm_2reg_1:
 ; RV64IZBB:       # %bb.0:
-; RV64IZBB-NEXT:    li a0, -1
-; RV64IZBB-NEXT:    slli a0, a0, 35
-; RV64IZBB-NEXT:    addi a0, a0, 9
+; RV64IZBB-NEXT:    lui a0, 1048430
+; RV64IZBB-NEXT:    addiw a0, a0, 1493
 ; RV64IZBB-NEXT:    slli a0, a0, 13
-; RV64IZBB-NEXT:    addi a0, a0, 837
-; RV64IZBB-NEXT:    slli a0, a0, 12
-; RV64IZBB-NEXT:    addi a0, a0, 1656
+; RV64IZBB-NEXT:    addi a0, a0, -1921
+; RV64IZBB-NEXT:    srli a0, a0, 4
+; RV64IZBB-NEXT:    not a0, a0
 ; RV64IZBB-NEXT:    ret
 ;
 ; RV64IZBS-LABEL: imm_2reg_1:
 ; RV64IZBS:       # %bb.0:
-; RV64IZBS-NEXT:    lui a0, 74565
-; RV64IZBS-NEXT:    addiw a0, a0, 1656
-; RV64IZBS-NEXT:    bseti a0, a0, 60
-; RV64IZBS-NEXT:    bseti a0, a0, 61
-; RV64IZBS-NEXT:    bseti a0, a0, 62
-; RV64IZBS-NEXT:    bseti a0, a0, 63
+; RV64IZBS-NEXT:    lui a0, 1048430
+; RV64IZBS-NEXT:    addiw a0, a0, 1493
+; RV64IZBS-NEXT:    slli a0, a0, 13
+; RV64IZBS-NEXT:    addi a0, a0, -1921
+; RV64IZBS-NEXT:    srli a0, a0, 4
+; RV64IZBS-NEXT:    not a0, a0
 ; RV64IZBS-NEXT:    ret
 ;
 ; RV64IXTHEADBB-LABEL: imm_2reg_1:
 ; RV64IXTHEADBB:       # %bb.0:
-; RV64IXTHEADBB-NEXT:    li a0, -1
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 35
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 9
+; RV64IXTHEADBB-NEXT:    lui a0, 1048430
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1493
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 837
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 1656
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1921
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 4
+; RV64IXTHEADBB-NEXT:    not a0, a0
 ; RV64IXTHEADBB-NEXT:    ret
   ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
 }
@@ -1724,13 +1710,12 @@ define i64 @imm_neg_9223372034778874949() {
 ;
 ; RV64-NOPOOL-LABEL: imm_neg_9223372034778874949:
 ; RV64-NOPOOL:       # %bb.0:
-; RV64-NOPOOL-NEXT:    li a0, -1
-; RV64-NOPOOL-NEXT:    slli a0, a0, 37
-; RV64-NOPOOL-NEXT:    addi a0, a0, 31
+; RV64-NOPOOL-NEXT:    lui a0, 1048329
+; RV64-NOPOOL-NEXT:    addiw a0, a0, -1911
 ; RV64-NOPOOL-NEXT:    slli a0, a0, 12
-; RV64-NOPOOL-NEXT:    addi a0, a0, -273
-; RV64-NOPOOL-NEXT:    slli a0, a0, 14
-; RV64-NOPOOL-NEXT:    addi a0, a0, -1093
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1911
+; RV64-NOPOOL-NEXT:    srli a0, a0, 1
+; RV64-NOPOOL-NEXT:    not a0, a0
 ; RV64-NOPOOL-NEXT:    ret
 ;
 ; RV64I-POOL-LABEL: imm_neg_9223372034778874949:
@@ -1741,24 +1726,22 @@ define i64 @imm_neg_9223372034778874949() {
 ;
 ; RV64IZBA-LABEL: imm_neg_9223372034778874949:
 ; RV64IZBA:       # %bb.0:
-; RV64IZBA-NEXT:    li a0, -1
-; RV64IZBA-NEXT:    slli a0, a0, 37
-; RV64IZBA-NEXT:    addi a0, a0, 31
+; RV64IZBA-NEXT:    lui a0, 1048329
+; RV64IZBA-NEXT:    addiw a0, a0, -1911
 ; RV64IZBA-NEXT:    slli a0, a0, 12
-; RV64IZBA-NEXT:    addi a0, a0, -273
-; RV64IZBA-NEXT:    slli a0, a0, 14
-; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    addi a0, a0, -1911
+; RV64IZBA-NEXT:    srli a0, a0, 1
+; RV64IZBA-NEXT:    not a0, a0
 ; RV64IZBA-NEXT:    ret
 ;
 ; RV64IZBB-LABEL: imm_neg_9223372034778874949:
 ; RV64IZBB:       # %bb.0:
-; RV64IZBB-NEXT:    li a0, -1
-; RV64IZBB-NEXT:    slli a0, a0, 37
-; RV64IZBB-NEXT:    addi a0, a0, 31
+; RV64IZBB-NEXT:    lui a0, 1048329
+; RV64IZBB-NEXT:    addiw a0, a0, -1911
 ; RV64IZBB-NEXT:    slli a0, a0, 12
-; RV64IZBB-NEXT:    addi a0, a0, -273
-; RV64IZBB-NEXT:    slli a0, a0, 14
-; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    addi a0, a0, -1911
+; RV64IZBB-NEXT:    srli a0, a0, 1
+; RV64IZBB-NEXT:    not a0, a0
 ; RV64IZBB-NEXT:    ret
 ;
 ; RV64IZBS-LABEL: imm_neg_9223372034778874949:
@@ -1770,13 +1753,12 @@ define i64 @imm_neg_9223372034778874949() {
 ;
 ; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
 ; RV64IXTHEADBB:       # %bb.0:
-; RV64IXTHEADBB-NEXT:    li a0, -1
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 37
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 31
+; RV64IXTHEADBB-NEXT:    lui a0, 1048329
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1911
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
-; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
-; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1911
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    not a0, a0
 ; RV64IXTHEADBB-NEXT:    ret
   ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
 }



More information about the llvm-commits mailing list