[llvm] Riscv fix shifts (PR #156335)

Mon Sep 1 07:59:22 PDT 2025

https://github.com/steven-studio created https://github.com/llvm/llvm-project/pull/156335

[RISCV] Fix shift instruction opcodes in getArithmeticInstrCost

Currently all three shift types (SHL/SRL/SRA) incorrectly map to 
VSLL_VV. This patch fixes them to map to their correct RVV instructions:
- ISD::SHL -> RISCV::VSLL_VV (shift left)
- ISD::SRL -> RISCV::VSRL_VV (logical right shift) 
- ISD::SRA -> RISCV::VSRA_VV (arithmetic right shift)

Also fixes ssub_sat intrinsic to use VSSUB_VV instead of VSSUBU_VV.

Includes test updates to reflect the corrected cost model.

>From 2cd3a56d80c2a9550a5a21ab3c4f885416e88b82 Mon Sep 17 00:00:00 2001
From: steven-studio <stevenyu.supreme at gmail.com>
Date: Mon, 1 Sep 2025 22:13:54 +0800
Subject: [PATCH] [RISCV] Fix multiple instruction mapping errors in TTI

- Fix shift instruction opcodes: SHL/SRL/SRA now correctly map to
  VSLL_VV/VSRL_VV/VSRA_VV instead of all using VSLL_VV
- Fix ssub_sat intrinsic to use VSSUB_VV instead of VSSUBU_VV
- Fix Mul immediate cost handling for cases without MULI instruction
- Add safe handling for optional values in VScale calculations
---
 llvm/lib/Target/RISCV/RISCVGISel.td              | 12 ++++++++++++
 .../Target/RISCV/RISCVTargetTransformInfo.cpp    | 16 ++++++++++------
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 791efca09d40e..84cd525a29eff 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -16,6 +16,18 @@
 include "RISCV.td"
 include "RISCVCombine.td"
 
+// (setult reg, imm12) → SLTIU
+def : Pat<(XLenVT (setult (XLenVT GPR:$rs1), simm12:$imm)),
+          (SLTIU GPR:$rs1, simm12:$imm)>;
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+  def : Pat<(i64 (sext (i16 GPR:$rs))), (SEXT_H GPR:$rs)>;
+  def : Pat<(i64 (zext (i16 GPR:$rs))), (ZEXT_H_RV64 GPR:$rs)>;
+}
+let Predicates = [HasStdExtZbb, IsRV32] in {
+  def : Pat<(i32 (zext (i16 GPR:$rs))), (ZEXT_H_RV32 GPR:$rs)>;
+}
+
 def simm12Plus1 : ImmLeaf<XLenVT, [{
     return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
 def simm12Plus1i32 : ImmLeaf<i32, [{
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index c707fb110b10c..a777c2af0ecfc 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -85,14 +85,14 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
     case RISCV::VFREDUSUM_VS: {
       unsigned VL = VT.getVectorMinNumElements();
       if (!VT.isFixedLengthVector())
-        VL *= *getVScaleForTuning();
+        VL *= getVScaleForTuning().value_or(1);
       Cost += Log2_32_Ceil(VL);
       break;
     }
     case RISCV::VFREDOSUM_VS: {
       unsigned VL = VT.getVectorMinNumElements();
       if (!VT.isFixedLengthVector())
-        VL *= *getVScaleForTuning();
+        VL *= getVScaleForTuning().value_or(1);
       Cost += VL;
       break;
     }
@@ -242,7 +242,7 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
     // One more or less than a power of 2 can use SLLI+ADD/SUB.
     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
       return TTI::TCC_Free;
-    // FIXME: There is no MULI instruction.
+    // No MULI in RISC-V, but 12-bit immediates can still be used in sequences.
     Takes12BitImm = true;
     break;
   case Instruction::Sub:
@@ -1342,7 +1342,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
         Op = RISCV::VSADD_VV;
         break;
       case Intrinsic::ssub_sat:
-        Op = RISCV::VSSUBU_VV;
+        Op = RISCV::VSSUB_VV;
         break;
       case Intrinsic::uadd_sat:
         Op = RISCV::VSADDU_VV;
@@ -1766,7 +1766,7 @@ unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) const {
   if (isa<ScalableVectorType>(Ty)) {
     const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());
     const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue();
-    const unsigned VectorBits = *getVScaleForTuning() * RISCV::RVVBitsPerBlock;
+    const unsigned VectorBits = getVScaleForTuning().value_or(1) * RISCV::RVVBitsPerBlock;
     return RISCVTargetLowering::computeVLMAX(VectorBits, EltSize, MinSize);
   }
   return cast<FixedVectorType>(Ty)->getNumElements();
@@ -2510,9 +2510,13 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
     Op = RISCV::VADD_VV;
     break;
   case ISD::SHL:
+    Op = RISCV::VSLL_VV;
+    break;
   case ISD::SRL:
+    Op = RISCV::VSRL_VV;
+    break;
   case ISD::SRA:
-    Op = RISCV::VSLL_VV;
+    Op = RISCV::VSRA_VV;
     break;
   case ISD::AND:
   case ISD::OR: