[llvm] b41e0fb - [RISCV] Prevent overflowing the small size of InstSeq in generateInstSeq.

Mon Jun 12 15:29:19 PDT 2023

Author: Craig Topper
Date: 2023-06-12T15:29:03-07:00
New Revision: b41e0fb44ffaeebb508efb02d3359b175bf04c84

URL: https://github.com/llvm/llvm-project/commit/b41e0fb44ffaeebb508efb02d3359b175bf04c84
DIFF: https://github.com/llvm/llvm-project/commit/b41e0fb44ffaeebb508efb02d3359b175bf04c84.diff

LOG: [RISCV] Prevent overflowing the small size of InstSeq in generateInstSeq.

The small size is 8 which is the worst case of the core recursive
algorithm.

The special cases use the core algorithm and append additonal
instructions. We were pushing the extra instructions before checking
the profitability. This could lead to 9 and maybe 10 instructions
in the sequence which overflows the small size.

This patch does the profitability check before inserting the
extra instructions so that we don't create 9 or 10 insruction
sequences.

Alternative we could bump the small size to 9 or 10, but then
we're pushing things that are never going be used.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D152308

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index e0d38993021a7..f659779e97720 100644

--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -190,11 +190,12 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
               isInt<6>(ShiftedVal) && !ActiveFeatures[RISCV::TuneLUIADDIFusion];
     RISCVMatInt::InstSeq TmpSeq;
     generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
-    TmpSeq.emplace_back(RISCV::SLLI, TrailingZeros);
 
     // Keep the new sequence if it is an improvement.
-    if (TmpSeq.size() < Res.size() || IsShiftedCompressible)
+    if ((TmpSeq.size() + 1) < Res.size() || IsShiftedCompressible) {
+      TmpSeq.emplace_back(RISCV::SLLI, TrailingZeros);
       Res = TmpSeq;
+    }
   }
 
   // If we have a 1 or 2 instruction sequence this is the best we can do. This
@@ -218,21 +219,23 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
 
     RISCVMatInt::InstSeq TmpSeq;
     generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
-    TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
 
     // Keep the new sequence if it is an improvement.
-    if (TmpSeq.size() < Res.size())
+    if ((TmpSeq.size() + 1) < Res.size()) {
+      TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
       Res = TmpSeq;
+    }
 
     // Some cases can benefit from filling the lower bits with zeros instead.
     ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
     TmpSeq.clear();
     generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
-    TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
 
     // Keep the new sequence if it is an improvement.
-    if (TmpSeq.size() < Res.size())
+    if ((TmpSeq.size() + 1) < Res.size()) {
+      TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
       Res = TmpSeq;
+    }
 
     // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
     // the end of the sequence.
@@ -241,11 +244,12 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
       uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
       TmpSeq.clear();
       generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
-      TmpSeq.emplace_back(RISCV::ADD_UW, 0);
 
       // Keep the new sequence if it is an improvement.
-      if (TmpSeq.size() < Res.size())
+      if ((TmpSeq.size() + 1) < Res.size()) {
+        TmpSeq.emplace_back(RISCV::ADD_UW, 0);
         Res = TmpSeq;
+      }
     }
   }
 
@@ -258,9 +262,10 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
     if (LoVal == HiVal) {
       RISCVMatInt::InstSeq TmpSeq;
       generateInstSeqImpl(LoVal, ActiveFeatures, TmpSeq);
-      TmpSeq.emplace_back(RISCV::PACK, 0);
-      if (TmpSeq.size() < Res.size())
+      if ((TmpSeq.size() + 1) < Res.size()) {
+        TmpSeq.emplace_back(RISCV::PACK, 0);
         Res = TmpSeq;
+      }
     }
   }
 
@@ -284,9 +289,10 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
     if (isInt<32>(NewVal)) {
       RISCVMatInt::InstSeq TmpSeq;
       generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
-      TmpSeq.emplace_back(Opc, 31);
-      if (TmpSeq.size() < Res.size())
+      if ((TmpSeq.size() + 1) < Res.size()) {
+        TmpSeq.emplace_back(Opc, 31);
         Res = TmpSeq;
+      }
     }
 
     // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
@@ -335,9 +341,10 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
     // Build the new instruction sequence.
     if (Div > 0) {
       generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
-      TmpSeq.emplace_back(Opc, 0);
-      if (TmpSeq.size() < Res.size())
+      if ((TmpSeq.size() + 1) < Res.size()) {
+        TmpSeq.emplace_back(Opc, 0);
         Res = TmpSeq;
+      }
     } else {
       // Try to use LUI+SH*ADD+ADDI.
       int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
@@ -361,10 +368,11 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
                "unexpected instruction sequence for immediate materialisation");
         assert(TmpSeq.empty() && "Expected empty TmpSeq");
         generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
-        TmpSeq.emplace_back(Opc, 0);
-        TmpSeq.emplace_back(RISCV::ADDI, Lo12);
-        if (TmpSeq.size() < Res.size())
+        if ((TmpSeq.size() + 2) < Res.size()) {
+          TmpSeq.emplace_back(Opc, 0);
+          TmpSeq.emplace_back(RISCV::ADDI, Lo12);
           Res = TmpSeq;
+        }
       }
     }
   }