[llvm] [Exegesis][RISCV] Add initial RVV support (PR #128767)

Min-Yih Hsu via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 26 14:04:06 PST 2025

https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/128767

>From a0651b5b9b3af5beb0cce58e5d5c5d147521e918 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 24 Feb 2025 11:32:36 -0800
Subject: [PATCH 1/4] [Exegesis][RISCV] Add RVV support

 .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h |  37 +
 .../RISCV/rvv/eligible-inst.test              |  10 +
 .../llvm-exegesis/RISCV/rvv/explicit-sew.test |   7 +
 .../tools/llvm-exegesis/RISCV/rvv/filter.test |   6 +
 .../llvm-exegesis/RISCV/rvv/reduction.test    |   7 +
 .../RISCV/rvv/self-aliasing.test              |   6 +
 .../llvm-exegesis/RISCV/rvv/skip-rm.test      |  12 +
 .../RISCV/rvv/valid-sew-zvk.test              |  33 +
 .../llvm-exegesis/RISCV/rvv/valid-sew.test    |  41 ++
 .../llvm-exegesis/RISCV/rvv/vlmax-only.test   |   7 +
 .../RISCV/rvv/vtype-rm-setup.test             |  13 +
 .../llvm-exegesis/lib/MCInstrDescView.cpp     |   4 +
 .../tools/llvm-exegesis/lib/MCInstrDescView.h |   4 +
 .../llvm-exegesis/lib/RISCV/CMakeLists.txt    |   2 +
 .../lib/RISCV/RISCVExegesisPasses.h           |  19 +
 .../lib/RISCV/RISCVExegesisPostprocessing.cpp | 130 ++++
 .../lib/RISCV/RISCVExegesisPreprocessing.cpp  |  85 +++
 llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp | 687 +++++++++++++++++-
 .../lib/SerialSnippetGenerator.cpp            |   2 +
 llvm/tools/llvm-exegesis/lib/Target.cpp       |  13 +
 llvm/tools/llvm-exegesis/lib/Target.h         |   3 +
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    |  15 +-
 22 files changed, 1092 insertions(+), 51 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
 create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
 create mode 100644 llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h
 create mode 100644 llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
 create mode 100644 llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 80ff18d914dca..135aec0c8135c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -432,7 +432,44 @@ enum RoundingMode {
   RNE = 1,
   RDN = 2,
   ROD = 3,
+  Invalid
+inline static StringRef roundingModeToString(RoundingMode RndMode) {
+  switch (RndMode) {
+  default:
+    llvm_unreachable("Unknown vector fixed-point rounding mode");
+  case RISCVVXRndMode::RNU:
+    return "rnu";
+  case RISCVVXRndMode::RNE:
+    return "rne";
+  case RISCVVXRndMode::RDN:
+    return "rdn";
+  case RISCVVXRndMode::ROD:
+    return "rod";
+  }
+inline static RoundingMode stringToRoundingMode(StringRef Str) {
+  return StringSwitch<RoundingMode>(Str)
+      .Case("rnu", RISCVVXRndMode::RNU)
+      .Case("rne", RISCVVXRndMode::RNE)
+      .Case("rdn", RISCVVXRndMode::RDN)
+      .Case("rod", RISCVVXRndMode::ROD)
+      .Default(RISCVVXRndMode::Invalid);
+inline static bool isValidRoundingMode(unsigned Mode) {
+  switch (Mode) {
+  default:
+    return false;
+  case RISCVVXRndMode::RNU:
+  case RISCVVXRndMode::RNE:
+  case RISCVVXRndMode::RDN:
+  case RISCVVXRndMode::ROD:
+    return true;
+  }
 } // namespace RISCVVXRndMode
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
new file mode 100644
index 0000000000000..189adf2c1b334
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
@@ -0,0 +1,10 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
new file mode 100644
index 0000000000000..476cf35818d6f
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
@@ -0,0 +1,7 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
+# RUN:    --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
+# Make sure none of the config has SEW other than e32
+# CHECK: SEW: e32
+# CHECK-NOT: SEW: e{{(8|16|64)}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
new file mode 100644
index 0000000000000..e3a4336fdf670
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
@@ -0,0 +1,6 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK \
+# RUN:    --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
+# CHECK: config:          'vtype = {VXRM: rod, AVL: VLMAX, SEW: e8, Policy: ta/mu}'
+# CHECK: config:          'vtype = {VXRM: rod, AVL: VLMAX, SEW: e16, Policy: ta/mu}'
+# CHECK-NOT: config:          'vtype = {VXRM: rod, AVL: VLMAX, SEW: e(32|64), Policy: ta/mu}'
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
new file mode 100644
index 0000000000000..a637fa24af16b
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
@@ -0,0 +1,7 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVWREDSUMU_VS_M8_E32 --min-instructions=100 | \
+# RUN:    FileCheck %s
+# Make sure reduction ops don't have alias between vd and vs1
+# CHECK:      instructions:
+# CHECK-NOT:  V[[REG:[0-9]+]] V[[REG]] V{{[0-9]+}}M8 V[[REG]]
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
new file mode 100644
index 0000000000000..c950341716238
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
@@ -0,0 +1,6 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVXOR_VX_M4 --min-instructions=100 | \
+# RUN:    FileCheck %s
+# Make sure all def / use operands are the same in latency mode.
+# CHECK:      instructions:
+# CHECK-NEXT: PseudoVXOR_VX_M4 V[[REG:[0-9]+]]M4 V[[REG]]M4 V[[REG]]M4 X{{.*}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
new file mode 100644
index 0000000000000..a3af37149eeb5
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
@@ -0,0 +1,12 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
+# RUN:    --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VXRM
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
+# RUN:    --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRM
+# VXRM: VXRM: rnu
+# VXRM-NOT: VXRM: {{(rne|rdn|rod)}}
+# FRM: PseudoVFADD_VFPR16_M1_E16
+# FRM: FRM: rne
+# FRM-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
new file mode 100644
index 0000000000000..515d3397b57be
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
@@ -0,0 +1,33 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVAESDF_VS_M1_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=ZVK
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVGHSH_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=ZVK
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVSM4K_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=ZVK
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVSM3C_VI_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=ZVK
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVSHA2MS_VV_M1_E32 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=ZVKNH
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVSHA2MS_VV_M2_E64 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=ZVKNH
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVSM3C_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=EMPTY
+# Most vector crypto only supports SEW=32, except Zvknhb which also supports SEW=64
+# ZVK-NOT: SEW: e{{(8|16)}}
+# ZVK: SEW: e32
+# ZVK-NOT: SEW: e64
+# ZVKNH(A|B) can either have SEW=32 (EGW=128) or SEW=64 (EGW=256)
+# ZVKNH-NOT: SEW: e{{(8|16)}}
+# ZVKNH: SEW: e{{(32|64)}}
+# EMPTY-NOT: SEW: e{{(8|16|32|64)}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
new file mode 100644
index 0000000000000..b678300564529
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
@@ -0,0 +1,41 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVMUL_VV_MF4_MASK \
+# RUN:    --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRAC-LMUL
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVFADD_VFPR16_M1_E16,PseudoVFADD_VV_M2_E16,PseudoVFCLASS_V_MF2 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=FP
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVSEXT_VF8_M2,PseudoVZEXT_VF8_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=VEXT
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVFREDUSUM_VS_M1_E16 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=VFRED --allow-empty
+# Make sure only the supported SEWs are generated for fractional LMUL.
+# FRAC-LMUL: SEW: e16
+# FRAC-LMUL-NOT: SEW: e{{(32|64)}}
+# Make sure only SEWs that are equal to the supported FLEN are generated
+# FP: PseudoVFADD_VFPR16_M1_E16
+# FP-NOT: SEW: e8
+# FP: PseudoVFADD_VV_M2_E16
+# FP-NOT: SEW: e8
+# FP: PseudoVFCLASS_V_MF2
+# FP-NOT: SEW: e8
+# VS/ZEXT can only operate on SEW that will not lead to invalid EEW on the
+# source operand.
+# VEXT: PseudoVSEXT_VF8_M2
+# VEXT-NOT: SEW: e8
+# VEXT-NOT: SEW: e16
+# VEXT-NOT: SEW: e32
+# VEXT: SEW: e64
+# VEXT: PseudoVZEXT_VF8_M2
+# VEXT-NOT: SEW: e8
+# VEXT-NOT: SEW: e16
+# VEXT-NOT: SEW: e32
+# VEXT: SEW: e64
+# P470 doesn't have Zvfh so 16-bit vfredusum shouldn't exist
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
new file mode 100644
index 0000000000000..30897b6e13735
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
@@ -0,0 +1,7 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
+# RUN:    --riscv-vlmax-for-vl --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
+# Only allow VLMAX for AVL when -riscv-vlmax-for-vl is present
+# CHECK-NOT: AVL: {{(simm5|<MCOperand: .*>)}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
new file mode 100644
index 0000000000000..c41b357c13821
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
@@ -0,0 +1,13 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
+# RUN:    --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
+# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VFWREDUSUM
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVSSRL_VX_MF4 \
+# RUN:    --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
+# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VSSRL
+# Make sure the correct VSETVL / VXRM write / FRM write instructions are generated
+# VFWREDUSUM: vsetvli {{.*}}, zero, e32, m1, tu, ma
+# VFWREDUSUM: fsrmi   {{.*}}, 0x0
+# VSSRL: vsetvli {{.*}}, zero, e8, mf4, tu, ma
+# VSSRL: csrwi   vxrm, 0x0
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index c002f68b427f7..e0e796cee8040 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -50,6 +50,8 @@ bool Operand::isTied() const { return TiedToIndex.has_value(); }
 bool Operand::isVariable() const { return VariableIndex.has_value(); }
+bool Operand::isEarlyClobber() const { return IsEarlyClobber; }
 bool Operand::isMemory() const {
   return isExplicit() &&
          getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY;
@@ -115,6 +117,8 @@ Instruction::create(const MCInstrInfo &InstrInfo,
     Operand Operand;
     Operand.Index = OpIndex;
     Operand.IsDef = (OpIndex < Description->getNumDefs());
+    Operand.IsEarlyClobber =
+        (Description->getOperandConstraint(OpIndex, MCOI::EARLY_CLOBBER) != -1);
     // TODO(gchatelet): Handle isLookupPtrRegClass.
     if (OpInfo.RegClass >= 0)
       Operand.Tracker = &RATC.getRegisterClass(OpInfo.RegClass);
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index c1af10fa460a3..0a62967897c79 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -72,6 +72,7 @@ struct Operand {
   bool isVariable() const;
   bool isMemory() const;
   bool isImmediate() const;
+  bool isEarlyClobber() const;
   unsigned getIndex() const;
   unsigned getTiedToIndex() const;
   unsigned getVariableIndex() const;
@@ -82,6 +83,7 @@ struct Operand {
   // Please use the accessors above and not the following fields.
   std::optional<uint8_t> Index;
   bool IsDef = false;
+  bool IsEarlyClobber = false;
   const RegisterAliasingTracker *Tracker = nullptr; // Set for Register Op.
   const MCOperandInfo *Info = nullptr;              // Set for Explicit Op.
   std::optional<uint8_t> TiedToIndex;               // Set for Reg&Explicit Op.
@@ -115,6 +117,8 @@ struct Instruction {
   Instruction &operator=(const Instruction &) = delete;
   Instruction &operator=(Instruction &&) = delete;
+  unsigned getOpcode() const { return Description.getOpcode(); }
   // Returns the Operand linked to this Variable.
   // In case the Variable is tied, the primary (i.e. Def) Operand is returned.
   const Operand &getPrimaryOperand(const Variable &Var) const;
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
index 489ac6d6e34b3..d379874fa1d0e 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
@@ -14,6 +14,8 @@ set(LLVM_LINK_COMPONENTS
+  RISCVExegesisPreprocessing.cpp
+  RISCVExegesisPostprocessing.cpp
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h
new file mode 100644
index 0000000000000..f206966331756
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h
@@ -0,0 +1,19 @@
+//===- RISCVExegesisPasses.h - RISC-V specific Exegesis Passes --*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+namespace llvm {
+class FunctionPass;
+namespace exegesis {
+FunctionPass *createRISCVPreprocessingPass();
+FunctionPass *createRISCVPostprocessingPass();
+} // namespace exegesis
+} // namespace llvm
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
new file mode 100644
index 0000000000000..e25cf04a01d9e
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
@@ -0,0 +1,130 @@
+//===- RISCVExegesisPostprocessing.cpp - Post processing MI for exegesis---===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// \file
+// Currently there is only one post-processing we need to do for exegesis:
+// Assign a physical register to VSETVL's rd if it's not X0 (i.e. VLMAX).
+#include "RISCV.h"
+#include "RISCVExegesisPasses.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+#define DEBUG_TYPE "riscv-exegesis-post-processing"
+namespace {
+struct RISCVExegesisPostprocessing : public MachineFunctionPass {
+  static char ID;
+  RISCVExegesisPostprocessing() : MachineFunctionPass(ID) {}
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+  // Extremely simple register allocator that picks a register that hasn't
+  // been defined or used in this function.
+  Register allocateGPRRegister(const MachineFunction &MF,
+                               const MachineRegisterInfo &MRI);
+  bool processVSETVL(MachineInstr &MI, MachineRegisterInfo &MRI);
+  bool processWriteFRM(MachineInstr &MI, MachineRegisterInfo &MRI);
+} // anonymous namespace
+char RISCVExegesisPostprocessing::ID = 0;
+bool RISCVExegesisPostprocessing::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  for (auto &MBB : MF)
+    for (auto &MI : MBB) {
+      unsigned Opcode = MI.getOpcode();
+      switch (Opcode) {
+      case RISCV::VSETVLI:
+      case RISCV::VSETVL:
+      case RISCV::PseudoVSETVLI:
+      case RISCV::PseudoVSETVLIX0:
+        Changed |= processVSETVL(MI, MF.getRegInfo());
+        break;
+      case RISCV::SwapFRMImm:
+      case RISCV::WriteFRM:
+        Changed |= processWriteFRM(MI, MF.getRegInfo());
+        break;
+      default:
+        break;
+      }
+    }
+  if (Changed)
+    MF.getRegInfo().clearVirtRegs();
+  LLVM_DEBUG(MF.print(dbgs() << "===After RISCVExegesisPostprocessing===\n");
+             dbgs() << "\n");
+  return Changed;
+Register RISCVExegesisPostprocessing::allocateGPRRegister(
+    const MachineFunction &MF, const MachineRegisterInfo &MRI) {
+  const auto &TRI = *MRI.getTargetRegisterInfo();
+  const TargetRegisterClass *GPRClass =
+      TRI.getRegClass(RISCV::GPRJALRRegClassID);
+  BitVector Candidates = TRI.getAllocatableSet(MF, GPRClass);
+  for (unsigned SetIdx : Candidates.set_bits()) {
+    if (MRI.reg_empty(Register(SetIdx)))
+      return Register(SetIdx);
+  }
+  // All bets are off, assign a fixed one.
+  return RISCV::X5;
+bool RISCVExegesisPostprocessing::processVSETVL(MachineInstr &MI,
+                                                MachineRegisterInfo &MRI) {
+  bool Changed = false;
+  // Replace both AVL and VL (i.e. the result) operands with physical
+  // registers.
+  for (unsigned Idx = 0U; Idx < 2; ++Idx)
+    if (MI.getOperand(Idx).isReg()) {
+      Register RegOp = MI.getOperand(Idx).getReg();
+      if (RegOp.isVirtual()) {
+        MRI.replaceRegWith(RegOp, allocateGPRRegister(*MI.getMF(), MRI));
+        Changed = true;
+      }
+    }
+  return Changed;
+bool RISCVExegesisPostprocessing::processWriteFRM(MachineInstr &MI,
+                                                  MachineRegisterInfo &MRI) {
+  // The virtual register will be the first operand in both SwapFRMImm and
+  // WriteFRM.
+  if (MI.getOperand(0).isReg()) {
+    Register DestReg = MI.getOperand(0).getReg();
+    if (DestReg.isVirtual()) {
+      MRI.replaceRegWith(DestReg, allocateGPRRegister(*MI.getMF(), MRI));
+      return true;
+    }
+  }
+  return false;
+FunctionPass *llvm::exegesis::createRISCVPostprocessingPass() {
+  return new RISCVExegesisPostprocessing();
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp
new file mode 100644
index 0000000000000..7f1cfd9ea52df
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp
@@ -0,0 +1,85 @@
+//===- RISCVExegesisPreprocessing.cpp -------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// \file
+#include "RISCV.h"
+#include "RISCVExegesisPasses.h"
+#include "RISCVRegisterInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+#define DEBUG_TYPE "riscv-exegesis-preprocessing"
+namespace {
+struct RISCVExegesisPreprocessing : public MachineFunctionPass {
+  static char ID;
+  RISCVExegesisPreprocessing() : MachineFunctionPass(ID) {}
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+} // anonymous namespace
+char RISCVExegesisPreprocessing::ID = 0;
+static bool processAVLOperand(MachineInstr &MI, MachineRegisterInfo &MRI,
+                              const TargetInstrInfo &TII) {
+  const MCInstrDesc &Desc = TII.get(MI.getOpcode());
+  uint64_t TSFlags = Desc.TSFlags;
+  if (!RISCVII::hasVLOp(TSFlags))
+    return false;
+  const MachineOperand &VLOp = MI.getOperand(RISCVII::getVLOpNum(Desc));
+  if (VLOp.isReg()) {
+    Register VLReg = VLOp.getReg();
+    if (VLReg.isVirtual())
+      return false;
+    assert(RISCV::GPRRegClass.contains(VLReg));
+    // Replace all uses of the original physical register with a new virtual
+    // register. The only reason we can do such replacement here is because it's
+    // almost certain that VLReg only has a single definition.
+    Register NewVLReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+    MRI.replaceRegWith(VLReg, NewVLReg);
+    return true;
+  }
+  return false;
+bool RISCVExegesisPreprocessing::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const auto &STI = MF.getSubtarget<RISCVSubtarget>();
+  if (!STI.hasVInstructions())
+    return false;
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+  LLVM_DEBUG(MF.print(dbgs() << "===Before RISCVExegesisPoreprocessing===\n");
+             dbgs() << "\n");
+  bool Changed = false;
+  for (auto &MBB : MF)
+    for (auto &MI : MBB) {
+      Changed |= processAVLOperand(MI, MRI, TII);
+    }
+  return Changed;
+FunctionPass *llvm::exegesis::createRISCVPreprocessingPass() {
+  return new RISCVExegesisPreprocessing();
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
index d70f609c5e080..c66ca6b86fb86 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
@@ -7,11 +7,19 @@
 #include "../Target.h"
+#include "../ParallelSnippetGenerator.h"
+#include "../SerialSnippetGenerator.h"
+#include "../SnippetGenerator.h"
 #include "MCTargetDesc/RISCVBaseInfo.h"
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
 #include "MCTargetDesc/RISCVMatInt.h"
+#include "RISCV.h"
+#include "RISCVExegesisPasses.h"
 #include "RISCVInstrInfo.h"
+#include "RISCVRegisterInfo.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
 // include computeAvailableFeatures and computeRequiredFeatures.
@@ -24,10 +32,584 @@
 namespace llvm {
 namespace exegesis {
+static cl::opt<bool>
+    OnlyUsesVLMAXForVL("riscv-vlmax-for-vl",
+                       cl::desc("Only enumerate VLMAX for VL operand"),
+                       cl::init(false), cl::Hidden);
+static cl::opt<bool>
+    EnumerateRoundingModes("riscv-enumerate-rounding-modes",
+                           cl::desc("Enumerate different FRM and VXRM"),
+                           cl::init(true), cl::Hidden);
+static cl::opt<std::string>
+    FilterConfig("riscv-filter-config",
+                 cl::desc("Show only the configs matching this regex"),
+                 cl::init(""), cl::Hidden);
 #include "RISCVGenExegesis.inc"
 namespace {
+template <class BaseT> class RISCVSnippetGenerator : public BaseT {
+  static void printRoundingMode(raw_ostream &OS, unsigned Val, bool UsesVXRM) {
+    if (UsesVXRM) {
+      assert(RISCVVXRndMode::isValidRoundingMode(Val));
+      OS << RISCVVXRndMode::roundingModeToString(
+          static_cast<RISCVVXRndMode::RoundingMode>(Val));
+    } else {
+      assert(RISCVFPRndMode::isValidRoundingMode(Val));
+      OS << RISCVFPRndMode::roundingModeToString(
+          static_cast<RISCVFPRndMode::RoundingMode>(Val));
+    }
+  }
+  static constexpr unsigned MinSEW = 8;
+  // ELEN is basically SEW_max.
+  unsigned ELEN = 64;
+  // We can't know the real min/max VLEN w/o a Function, so we're
+  // using the VLen from Zvl.
+  unsigned ZvlVLen = 32;
+  /// Mask for registers that are NOT standalone registers like X0 and V0
+  BitVector AggregateRegisters;
+  // Returns true when opcode is available in any of the FBs.
+  static bool
+  isOpcodeAvailableIn(unsigned Opcode,
+                      ArrayRef<RISCV_MC::SubtargetFeatureBits> FBs) {
+    FeatureBitset RequiredFeatures = RISCV_MC::computeRequiredFeatures(Opcode);
+    for (uint8_t FB : FBs) {
+      if (RequiredFeatures[FB])
+        return true;
+    }
+    return false;
+  }
+  static bool isRVVFloatingPointOp(unsigned Opcode) {
+    return isOpcodeAvailableIn(Opcode,
+                               {RISCV_MC::Feature_HasVInstructionsAnyFBit});
+  }
+  // Get the element group width of each vector cryptor extension.
+  static unsigned getZvkEGWSize(unsigned Opcode, unsigned SEW) {
+    using namespace RISCV_MC;
+    if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkgBit,
+                                     Feature_HasStdExtZvknedBit,
+                                     Feature_HasStdExtZvksedBit}))
+      return 128U;
+    if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkshBit}))
+      return 256U;
+    if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvknhaOrZvknhbBit}))
+      // In Zvknh[ab], when SEW=64 is used (i.e. Zvknhb), EGW is 256.
+      // Otherwise it's 128.
+      return SEW == 64 ? 256U : 128U;
+    llvm_unreachable("Unsupported opcode");
+  }
+  // A handy utility to multiply or divide an integer by LMUL.
+  template <typename T> static T multiplyLMul(T Val, RISCVVType::VLMUL VLMul) {
+    auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
+    return IsFractional ? Val / LMul : Val * LMul;
+  }
+  /// Return the denominator of the fractional (i.e. the `x` in .vfx suffix) or
+  /// nullopt if BaseOpcode is not a vector sext/zext.
+  static std::optional<unsigned> isRVVSignZeroExtend(unsigned BaseOpcode) {
+    switch (BaseOpcode) {
+    case RISCV::VSEXT_VF2:
+    case RISCV::VZEXT_VF2:
+      return 2;
+    case RISCV::VSEXT_VF4:
+    case RISCV::VZEXT_VF4:
+      return 4;
+    case RISCV::VSEXT_VF8:
+    case RISCV::VZEXT_VF8:
+      return 8;
+    default:
+      return std::nullopt;
+    }
+  }
+  void annotateWithVType(const CodeTemplate &CT, const Instruction &Instr,
+                         unsigned BaseOpcode,
+                         const BitVector &ForbiddenRegisters,
+                         std::vector<CodeTemplate> &Result) const;
+  RISCVSnippetGenerator(const LLVMState &State,
+                        const SnippetGenerator::Options &Opts)
+      : BaseT(State, Opts),
+        AggregateRegisters(State.getRegInfo().getNumRegs(), /*initVal=*/true) {
+    // Initialize standalone registers mask.
+    const MCRegisterInfo &RegInfo = State.getRegInfo();
+    const unsigned StandaloneRegClasses[] = {
+        RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};
+    for (unsigned RegClassID : StandaloneRegClasses)
+      for (unsigned Reg : RegInfo.getRegClass(RegClassID)) {
+        AggregateRegisters.reset(Reg);
+      }
+    // Initialize ELEN and VLEN.
+    // FIXME: We could have obtained these two from RISCVSubtarget
+    // but in order to get that from TargetMachine, we need a Function.
+    const Triple &TT = State.getTargetMachine().getTargetTriple();
+    ELEN = TT.isRISCV32() ? 32 : 64;
+    const MCSubtargetInfo &STI = State.getSubtargetInfo();
+    std::string ZvlQuery;
+    for (unsigned I = 5U, Size = (1 << I); I < 17U; ++I, Size <<= 1) {
+      ZvlQuery = "+zvl";
+      raw_string_ostream SS(ZvlQuery);
+      SS << Size << "b";
+      if (STI.checkFeatures(SS.str()) && ZvlVLen < Size)
+        ZvlVLen = Size;
+    }
+  }
+  Expected<std::vector<CodeTemplate>>
+  generateCodeTemplates(InstructionTemplate Variant,
+                        const BitVector &ForbiddenRegisters) const override;
+static bool isMaskedSibiling(unsigned MaskedOp, unsigned UnmaskedOp) {
+  const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
+  return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
+// There are primarily two kinds of opcodes that are not eligible
+// in a serial snippet:
+// (1) Only has a single use operand that can not be overlap with
+// the def operand.
+// (2) The register file of the only use operand is different from
+// that of the def operand. For instance, use operand is vector and
+// the result is a scalar.
+static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
+                                         const Instruction &I) {
+  if (llvm::any_of(I.Operands,
+                   [](const Operand &Op) { return Op.isEarlyClobber(); }))
+    return true;
+  switch (BaseOpcode) {
+  case RISCV::VCPOP_M:
+  case RISCV::VCPOP_V:
+  // The truncate instructions that arraive here are those who cannot
+  // have any overlap between source and dest at all (i.e.
+  // those whoe don't satisfy condition 2 and 3 in RVV spec
+  // 5.2).
+    return true;
+  default:
+    return false;
+  }
+static bool isZvfhminZvfbfminOpcodes(unsigned BaseOpcode) {
+  switch (BaseOpcode) {
+  case RISCV::VFNCVT_F_F_W:
+  case RISCV::VFWCVT_F_F_V:
+  case RISCV::VFNCVTBF16_F_F_W:
+  case RISCV::VFWCVTBF16_F_F_V:
+    return true;
+  default:
+    return false;
+  }
+static bool isVectorReduction(unsigned BaseOpcode) {
+  switch (BaseOpcode) {
+    return true;
+  default:
+    return false;
+  }
+template <class BaseT>
+void RISCVSnippetGenerator<BaseT>::annotateWithVType(
+    const CodeTemplate &OrigCT, const Instruction &Instr, unsigned BaseOpcode,
+    const BitVector &ForbiddenRegisters,
+    std::vector<CodeTemplate> &Result) const {
+  const MCSubtargetInfo &STI = SnippetGenerator::State.getSubtargetInfo();
+  unsigned VPseudoOpcode = Instr.getOpcode();
+  bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;
+  const MCInstrDesc &MIDesc = Instr.Description;
+  const uint64_t TSFlags = MIDesc.TSFlags;
+  RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+  const size_t StartingResultSize = Result.size();
+  SmallPtrSet<const Operand *, 4> VTypeOperands;
+  std::optional<AliasingConfigurations> SelfAliasing;
+  // Exegesis see instructions with tied operands being inherently serial.
+  // But for RVV instructions, those tied operands are passthru rather
+  // than real read operands. So we manually put dependency between
+  // destination (i.e. def) and any of the non-tied/SEW/policy/AVL/RM
+  // operands.
+  auto assignSerialRVVOperands = [&, this](InstructionTemplate &IT) {
+    // Initialize SelfAliasing on first use.
+    if (!SelfAliasing.has_value()) {
+      BitVector ExcludeRegs = ForbiddenRegisters;
+      ExcludeRegs |= AggregateRegisters;
+      SelfAliasing = AliasingConfigurations(Instr, Instr, ExcludeRegs);
+      bool EmptyUses = false;
+      for (auto &ARO : SelfAliasing->Configurations) {
+        auto &Uses = ARO.Uses;
+        for (auto ROA = Uses.begin(); ROA != Uses.end();) {
+          const Operand *Op = ROA->Op;
+          // Exclude tied operand(s).
+          if (Op->isTied()) {
+            ROA = Uses.erase(ROA);
+            continue;
+          }
+          // Special handling for reduction operations: for a given reduction
+          // `vredop vd, vs2, vs1`, we don't want vd to be aliased with vs1
+          // since we're only reading `vs1[0]` and many implementations
+          // optimize for this case (e.g. chaining). Instead, we're forcing
+          // it to create alias between vd and vs2.
+          if (isVectorReduction(BaseOpcode) &&
+              // vs1's operand index is always 3.
+              Op->getIndex() == 3) {
+            ROA = Uses.erase(ROA);
+            continue;
+          }
+          // Exclude any special operands like SEW and VL -- we've already
+          // assigned values to them.
+          if (VTypeOperands.count(Op)) {
+            ROA = Uses.erase(ROA);
+            continue;
+          }
+          ++ROA;
+        }
+        // If any of the use operand candidate lists is empty, there is
+        // no point to assign self aliasing registers.
+        if (Uses.empty()) {
+          EmptyUses = true;
+          break;
+        }
+      }
+      if (EmptyUses)
+        SelfAliasing->Configurations.clear();
+    }
+    // This is a self aliasing instruction so defs and uses are from the same
+    // instance, hence twice IT in the following call.
+    if (!SelfAliasing->empty() && !SelfAliasing->hasImplicitAliasing())
+      setRandomAliasing(*SelfAliasing, IT, IT);
+  };
+  // We are going to create a CodeTemplate (configuration) for each supported
+  // SEW, policy, and VL.
+  // FIXME: Account for EEW and EMUL.
+  SmallVector<std::optional<unsigned>, 4> Log2SEWs;
+  SmallVector<std::optional<unsigned>, 4> Policies;
+  SmallVector<std::optional<int>, 3> AVLs;
+  SmallVector<std::optional<unsigned>, 8> RoundingModes;
+  bool HasSEWOp = RISCVII::hasSEWOp(TSFlags);
+  bool HasPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
+  bool HasVLOp = RISCVII::hasVLOp(TSFlags);
+  bool HasRMOp = RISCVII::hasRoundModeOp(TSFlags);
+  bool UsesVXRM = RISCVII::usesVXRM(TSFlags);
+  if (HasSEWOp) {
+    const Operand &SEWOp = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
+    VTypeOperands.insert(&SEWOp);
+    if (SEWOp.Info->OperandType == RISCVOp::OPERAND_SEW_MASK) {
+      // If it's a mask-producing instruction, the SEW operand is always zero.
+      Log2SEWs.push_back(0);
+    } else {
+      SmallVector<unsigned, 4> SEWCandidates;
+      // (RVV spec 3.4.2) For fractional LMUL, the supported SEW are between
+      // [SEW_min, LMUL * ELEN].
+      unsigned SEWUpperBound =
+          VLMul >= RISCVVType::LMUL_F8 ? multiplyLMul(ELEN, VLMul) : ELEN;
+      for (unsigned SEW = MinSEW; SEW <= SEWUpperBound; SEW <<= 1) {
+        SEWCandidates.push_back(SEW);
+        // Some scheduling classes already integrate SEW; only put
+        // their corresponding SEW values at the SEW operands.
+        // NOTE: It is imperative to put this condition in the front, otherwise
+        // it is tricky and difficult to know if there is an integrated
+        // SEW after other rules are applied to filter the candidates.
+        const auto *RVVBase =
+            RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
+        if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
+                        isMaskedSibiling(VPseudoOpcode, RVVBase->Pseudo) ||
+                        isMaskedSibiling(RVVBase->Pseudo, VPseudoOpcode))) {
+          // There is an integrated SEW, remove all but the SEW pushed last.
+          SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
+          break;
+        }
+      }
+      // Filter out some candidates.
+      for (auto SEW = SEWCandidates.begin(); SEW != SEWCandidates.end();) {
+        // For floating point operations, only select SEW of the supported FLEN.
+        if (isRVVFloatingPointOp(VPseudoOpcode)) {
+          bool Supported = false;
+          Supported |= isZvfhminZvfbfminOpcodes(BaseOpcode) && *SEW == 16;
+          Supported |= STI.hasFeature(RISCV::FeatureStdExtZvfh) && *SEW == 16;
+          Supported |= STI.hasFeature(RISCV::FeatureStdExtF) && *SEW == 32;
+          Supported |= STI.hasFeature(RISCV::FeatureStdExtD) && *SEW == 64;
+          if (!Supported) {
+            SEW = SEWCandidates.erase(SEW);
+            continue;
+          }
+        }
+        // The EEW for source operand in VSEXT and VZEXT is a fractional
+        // of the SEW, hence only SEWs that will lead to valid EEW are allowed.
+        if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
+          if (*SEW / *Frac < MinSEW) {
+            SEW = SEWCandidates.erase(SEW);
+            continue;
+          }
+        // Most vector crypto 1.0 instructions only work on SEW=32.
+        using namespace RISCV_MC;
+        if (isOpcodeAvailableIn(BaseOpcode, {Feature_HasStdExtZvkgBit,
+                                             Feature_HasStdExtZvknedBit,
+                                             Feature_HasStdExtZvknhaOrZvknhbBit,
+                                             Feature_HasStdExtZvksedBit,
+                                             Feature_HasStdExtZvkshBit})) {
+          if (*SEW != 32)
+            // Zvknhb support SEW=64 as well.
+            if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
+                !isOpcodeAvailableIn(BaseOpcode,
+                                     {Feature_HasStdExtZvknhaOrZvknhbBit})) {
+              SEW = SEWCandidates.erase(SEW);
+              continue;
+            }
+          // We're also enforcing the requirement of `LMUL * VLEN >= EGW` here,
+          // because some of the extensions have SEW-dependant EGW.
+          unsigned EGW = getZvkEGWSize(BaseOpcode, *SEW);
+          if (multiplyLMul(ZvlVLen, VLMul) < EGW) {
+            SEW = SEWCandidates.erase(SEW);
+            continue;
+          }
+        }
+        ++SEW;
+      }
+      // We're not going to produce any result with zero SEW candidate.
+      if (SEWCandidates.empty())
+        return;
+      for (unsigned SEW : SEWCandidates)
+        Log2SEWs.push_back(Log2_32(SEW));
+    }
+  } else {
+    Log2SEWs.push_back(std::nullopt);
+  }
+  if (HasPolicyOp) {
+    VTypeOperands.insert(&Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)]);
+  } else {
+    Policies.push_back(std::nullopt);
+  }
+  if (HasVLOp) {
+    VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc)]);
+    if (OnlyUsesVLMAXForVL)
+      AVLs.push_back(-1);
+    else
+      AVLs = {// 5-bit immediate value
+              1,
+              // VLMAX
+              -1,
+              // Non-X0 register
+              0};
+  } else {
+    AVLs.push_back(std::nullopt);
+  }
+  if (HasRMOp) {
+    VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1]);
+    // If we're not enumerating all rounding modes,
+    // use zero (rne in FRM and rnu in VXRM) as the default
+    // mode.
+    RoundingModes = {0U};
+    if (EnumerateRoundingModes) {
+      RoundingModes.append({1, 2, 3});
+      if (!UsesVXRM)
+        // FRM values 5 and 6 are currently reserved.
+        RoundingModes.append({4, 7});
+    }
+  } else {
+    RoundingModes = {std::nullopt};
+  }
+  std::set<std::tuple<std::optional<unsigned>, std::optional<int>,
+                      std::optional<unsigned>, std::optional<unsigned>>>
+      Combinations;
+  for (auto AVL : AVLs) {
+    for (auto Log2SEW : Log2SEWs)
+      for (auto Policy : Policies) {
+        for (auto RM : RoundingModes)
+          Combinations.insert(std::make_tuple(RM, AVL, Log2SEW, Policy));
+      }
+  }
+  std::string ConfigStr;
+  SmallVector<std::pair<const Operand *, MCOperand>, 4> ValueAssignments;
+  for (const auto &[RM, AVL, Log2SEW, Policy] : Combinations) {
+    InstructionTemplate IT(&Instr);
+    ListSeparator LS;
+    ConfigStr = "vtype = {";
+    raw_string_ostream SS(ConfigStr);
+    ValueAssignments.clear();
+    if (RM) {
+      const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1];
+      ValueAssignments.push_back({&Op, MCOperand::createImm(*RM)});
+      printRoundingMode(SS << LS << (UsesVXRM ? "VXRM" : "FRM") << ": ", *RM,
+                        UsesVXRM);
+    }
+    if (AVL) {
+      MCOperand OpVal;
+      if (*AVL < 0) {
+        // VLMAX
+        OpVal = MCOperand::createImm(-1);
+        SS << LS << "AVL: VLMAX";
+      } else if (*AVL == 0) {
+        // A register holding AVL.
+        // TODO: Generate a random register.
+        OpVal = MCOperand::createReg(RISCV::X5);
+        OpVal.print(SS << LS << "AVL: ");
+      } else {
+        // A 5-bit immediate.
+        // The actual value assignment is deferred to
+        // RISCVExegesisTarget::randomizeTargetMCOperand.
+        SS << LS << "AVL: simm5";
+      }
+      if (OpVal.isValid()) {
+        const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc)];
+        ValueAssignments.push_back({&Op, OpVal});
+      }
+    }
+    if (Log2SEW) {
+      const Operand &Op = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
+      ValueAssignments.push_back({&Op, MCOperand::createImm(*Log2SEW)});
+      SS << LS << "SEW: e" << (*Log2SEW ? 1 << *Log2SEW : 8);
+    }
+    if (Policy) {
+      const Operand &Op = Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)];
+      ValueAssignments.push_back({&Op, MCOperand::createImm(*Policy)});
+      SS << LS
+         << "Policy: " << (*Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu")
+         << "/" << (*Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
+    }
+    SS << "}";
+    // Filter out some configurations, if needed.
+    if (!FilterConfig.empty()) {
+      if (!Regex(FilterConfig).match(ConfigStr))
+        continue;
+    }
+    CodeTemplate CT = OrigCT.clone();
+    CT.Config = std::move(ConfigStr);
+    for (InstructionTemplate &IT : CT.Instructions) {
+      if (IsSerial) {
+        // Reset this template's value assignments and do it
+        // ourselves.
+        IT = InstructionTemplate(&Instr);
+        assignSerialRVVOperands(IT);
+      }
+      for (const auto &[Op, OpVal] : ValueAssignments)
+        IT.getValueFor(*Op) = OpVal;
+    }
+    Result.push_back(std::move(CT));
+    if (Result.size() - StartingResultSize >=
+        SnippetGenerator::Opts.MaxConfigsPerOpcode)
+      return;
+  }
+template <class BaseT>
+    InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
+  const Instruction &Instr = Variant.getInstr();
+  bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;
+  unsigned BaseOpcode = RISCV::getRVVMCOpcode(Instr.getOpcode());
+  // Bail out ineligible opcodes before generating base code templates since
+  // the latter is quite expensive.
+  if (IsSerial && BaseOpcode && isIneligibleOfSerialSnippets(BaseOpcode, Instr))
+    return std::vector<CodeTemplate>{};
+  auto BaseCodeTemplates =
+      BaseT::generateCodeTemplates(Variant, ForbiddenRegisters);
+  if (!BaseCodeTemplates)
+    return BaseCodeTemplates.takeError();
+  if (!BaseOpcode)
+    return BaseCodeTemplates;
+  // Specialize for RVV pseudo.
+  std::vector<CodeTemplate> ExpandedTemplates;
+  for (const auto &BaseCT : *BaseCodeTemplates)
+    annotateWithVType(BaseCT, Instr, BaseOpcode, ForbiddenRegisters,
+                      ExpandedTemplates);
+  return ExpandedTemplates;
 // Stores constant value to a general-purpose (integer) register.
 static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI,
                                       MCRegister Reg, const APInt &Value) {
@@ -75,30 +657,10 @@ static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI,
   return Instrs;
-static MCInst nop() {
-  // ADDI X0, X0, 0
-  return MCInstBuilder(RISCV::ADDI)
-      .addReg(RISCV::X0)
-      .addReg(RISCV::X0)
-      .addImm(0);
-static bool isVectorRegList(MCRegister Reg) {
-  return RISCV::VRM2RegClass.contains(Reg) ||
-         RISCV::VRM4RegClass.contains(Reg) ||
-         RISCV::VRM8RegClass.contains(Reg) ||
-         RISCV::VRN2M1RegClass.contains(Reg) ||
-         RISCV::VRN2M2RegClass.contains(Reg) ||
-         RISCV::VRN2M4RegClass.contains(Reg) ||
-         RISCV::VRN3M1RegClass.contains(Reg) ||
-         RISCV::VRN3M2RegClass.contains(Reg) ||
-         RISCV::VRN4M1RegClass.contains(Reg) ||
-         RISCV::VRN4M2RegClass.contains(Reg) ||
-         RISCV::VRN5M1RegClass.contains(Reg) ||
-         RISCV::VRN6M1RegClass.contains(Reg) ||
-         RISCV::VRN7M1RegClass.contains(Reg) ||
-         RISCV::VRN8M1RegClass.contains(Reg);
+// NOTE: Alternatively, we can use BitVector here, but the number of RVV MC
+// opcodes is just a small portion of the entire opcode space, so I thought it
+// would be a waste of space to use BitVector.
+static SmallSet<unsigned, 16> RVVMCOpcodesWithPseudos;
 class ExegesisRISCVTarget : public ExegesisTarget {
@@ -109,6 +671,30 @@ class ExegesisRISCVTarget : public ExegesisTarget {
   std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
                                const APInt &Value) const override;
+  const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
+                                           unsigned Opcode) const override {
+    // We don't want to support RVV instructions that depend on VTYPE, because
+    // those instructions by themselves don't carry any additional information
+    // for us to setup the proper VTYPE environment via VSETVL instructions.
+    // FIXME: Ideally, we should use RISCVVInversePseudosTable, but it requires
+    // LMUL and SEW and I don't think enumerating those combinations is any
+    // better than the ugly trick here that memorizes the corresponding MC
+    // opcodes of the RVV pseudo we have processed previously. This works most
+    // of the time because RVV pseudo opcodes are placed before any other RVV
+    // opcodes. Of course this doesn't work if we're asked to benchmark only a
+    // certain subset of opcodes.
+    if (RVVMCOpcodesWithPseudos.count(Opcode))
+      return "The MC opcode of RVV instructions are ignored";
+    // We want to support all RVV pseudos.
+    if (unsigned MCOpcode = RISCV::getRVVMCOpcode(Opcode)) {
+      RVVMCOpcodesWithPseudos.insert(MCOpcode);
+      return nullptr;
+    }
+    return ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode);
+  }
   MCRegister getDefaultLoopCounterRegister(const Triple &) const override;
   void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
@@ -131,9 +717,38 @@ class ExegesisRISCVTarget : public ExegesisTarget {
                                  MCOperand &AssignedValue,
                                  const BitVector &ForbiddenRegs) const override;
+  std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
+      const LLVMState &State,
+      const SnippetGenerator::Options &Opts) const override {
+    return std::make_unique<RISCVSnippetGenerator<SerialSnippetGenerator>>(
+        State, Opts);
+  }
+  std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
+      const LLVMState &State,
+      const SnippetGenerator::Options &Opts) const override {
+    return std::make_unique<RISCVSnippetGenerator<ParallelSnippetGenerator>>(
+        State, Opts);
+  }
   generateInstructionVariants(const Instruction &Instr,
                               unsigned MaxConfigsPerOpcode) const override;
+  void addTargetSpecificPasses(PassManagerBase &PM) const override {
+    // Turn AVL operand of physical registers into virtual registers.
+    PM.add(exegesis::createRISCVPreprocessingPass());
+    PM.add(createRISCVInsertVSETVLIPass());
+    // Setting up the correct FRM.
+    PM.add(createRISCVInsertReadWriteCSRPass());
+    PM.add(createRISCVInsertWriteVXRMPass());
+    // This will assign physical register to the result of VSETVLI instructions
+    // that produce VLMAX.
+    PM.add(exegesis::createRISCVPostprocessingPass());
+    // PseudoRET will be expanded by RISCVAsmPrinter; we have to expand
+    // PseudoMovImm with RISCVPostRAExpandPseudoPass though.
+    PM.add(createRISCVPostRAExpandPseudoPass());
+  }
@@ -157,20 +772,7 @@ std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI,
       return loadFPRegBits(STI, Reg, Value, RISCV::FMV_D_X);
     return loadFP64RegBits32(STI, Reg, Value);
-  if (Reg == RISCV::FRM || Reg == RISCV::VL || Reg == RISCV::VLENB ||
-      Reg == RISCV::VTYPE || RISCV::GPRPairRegClass.contains(Reg) ||
-      RISCV::VRRegClass.contains(Reg) || isVectorRegList(Reg)) {
-    // Don't initialize:
-    // - FRM
-    // - VL, VLENB, VTYPE
-    // - vector registers (and vector register lists)
-    // - Zfinx registers
-    // Generate 'NOP' so that exegesis treats such registers as initialized
-    // (it tries to initialize them with '0' anyway).
-    return {nop()};
-  }
-  errs() << "setRegTo is not implemented for Reg " << Reg
-         << ", results will be unreliable\n";
+  // TODO: Emit proper code to initialize other kinds of registers.
   return {};
@@ -243,6 +845,15 @@ Error ExegesisRISCVTarget::randomizeTargetMCOperand(
     AssignedValue = MCOperand::createImm(1);
+    // 5-bit signed immediate value.
+    AssignedValue = MCOperand::createImm(randomIndex(31) - 16);
+    break;
+    // 5-bit unsigned immediate value.
+    AssignedValue = MCOperand::createImm(randomIndex(31));
+    break;
     if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
         OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM)
diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
index 25cdf1ce66d44..f233ea4288a7e 100644
--- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
@@ -53,6 +53,8 @@ computeAliasingInstructions(const LLVMState &State, const Instruction *Instr,
     if (OtherOpcode == Instr->Description.getOpcode())
     const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
+    if (ET.getIgnoredOpcodeReasonOrNull(State, OtherInstr.getOpcode()))
+      continue;
     if (OtherInstr.hasMemoryOperands())
     if (!ET.allowAsBackToBack(OtherInstr))
diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp
index 5ea5b4c2c002f..68d19514bedb2 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Target.cpp
@@ -35,6 +35,19 @@ const ExegesisTarget *ExegesisTarget::lookup(Triple TT) {
   return nullptr;
+const char *
+ExegesisTarget::getIgnoredOpcodeReasonOrNull(const LLVMState &State,
+                                             unsigned Opcode) const {
+  const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description;
+  if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())
+    return "Unsupported opcode: isPseudo/usesCustomInserter";
+  if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
+    return "Unsupported opcode: isBranch/isIndirectBranch";
+  if (InstrDesc.isCall() || InstrDesc.isReturn())
+    return "Unsupported opcode: isCall/isReturn";
+  return nullptr;
 ExegesisTarget::createCounter(StringRef CounterName, const LLVMState &,
                               ArrayRef<const char *> ValidationCounters,
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index f3fbe3780616f..77fbaa6e95412 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -154,6 +154,9 @@ class ExegesisTarget {
     return IsOpcodeAvailable(Opcode, Features);
+  virtual const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
+                                                   unsigned Opcode) const;
   // Sets the stack register to the auxiliary memory so that operations
   // requiring the stack can be formed (e.g., setting large registers). The code
   // generated by this function may clobber registers.
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index b9938a92855a4..babcffeb9666a 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -300,18 +300,6 @@ T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {
   return std::move(*E);
-static const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
-                                                unsigned Opcode) {
-  const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description;
-  if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())
-    return "Unsupported opcode: isPseudo/usesCustomInserter";
-  if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
-    return "Unsupported opcode: isBranch/isIndirectBranch";
-  if (InstrDesc.isCall() || InstrDesc.isReturn())
-    return "Unsupported opcode: isCall/isReturn";
-  return nullptr;
 // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
 // and returns the opcode indices or {} if snippets should be read from
 // `SnippetsFile`.
@@ -370,7 +358,8 @@ static Expected<std::vector<BenchmarkCode>>
 generateSnippets(const LLVMState &State, unsigned Opcode,
                  const BitVector &ForbiddenRegs) {
   // Ignore instructions that we cannot run.
-  if (const char *Reason = getIgnoredOpcodeReasonOrNull(State, Opcode))
+  if (const char *Reason =
+          State.getExegesisTarget().getIgnoredOpcodeReasonOrNull(State, Opcode))
     return make_error<Failure>(Reason);
   const Instruction &Instr = State.getIC().getInstr(Opcode);

>From 0c4d94340c97988366d894721f6606fca689a5b3 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Tue, 25 Feb 2025 15:55:46 -0800
Subject: [PATCH 2/4] fixup! Change the test check prefix

 .../tools/llvm-exegesis/RISCV/rvv/skip-rm.test   | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
index a3af37149eeb5..b9a90b14bb26f 100644
--- a/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
@@ -1,12 +1,12 @@
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
-# RUN:    --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VXRM
+# RUN:    --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VX
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
-# RUN:    --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRM
+# RUN:    --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FP
-# VXRM: VXRM: rnu
-# VXRM-NOT: VXRM: {{(rne|rdn|rod)}}
+# VX: PseudoVAADDU_VV_M1
+# VX: VXRM: rnu
+# VX-NOT: VXRM: {{(rne|rdn|rod)}}
-# FRM: PseudoVFADD_VFPR16_M1_E16
-# FRM: FRM: rne
-# FRM-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}
+# FP: PseudoVFADD_VFPR16_M1_E16
+# FP: FRM: rne
+# FP-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}

>From bd1b6f856ad8101c59dda19f5f1b2ad04be63da8 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Wed, 26 Feb 2025 13:51:06 -0800
Subject: [PATCH 3/4] Address review comments

 .../RISCV/rvv/eligible-inst.test              | 57 +++++++++++++++++--
 .../lib/RISCV/RISCVExegesisPostprocessing.cpp |  5 +-
 llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp | 36 +++++-------
 3 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
index 189adf2c1b334..d5fdbc17192aa 100644
--- a/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
@@ -1,10 +1,59 @@
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
-# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=LATENCY
 # RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
-# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT
+# RUN:    --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 \
+# RUN:    --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=RTHROUGHPUT1
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 \
+# RUN:    --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=RTHROUGHPUT2
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 \
+# RUN:    --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=RTHROUGHPUT3
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN:    --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 | \
+# RUN:    FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN:    --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 \
+# RUN:    --min-instructions=100 | \
+# RUN:    FileCheck %s --check-prefix=RTHROUGHPUT4
+# These instructions are only eligible under the inverse throughput mode.
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
index e25cf04a01d9e..fbb3d73a88c72 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
@@ -81,8 +81,9 @@ Register RISCVExegesisPostprocessing::allocateGPRRegister(
     const MachineFunction &MF, const MachineRegisterInfo &MRI) {
   const auto &TRI = *MRI.getTargetRegisterInfo();
-  const TargetRegisterClass *GPRClass =
-      TRI.getRegClass(RISCV::GPRJALRRegClassID);
+  // We hope to avoid allocating callee-saved registers. And GPRTC
+  // happens to account for nearly all caller-saved registers.
+  const TargetRegisterClass *GPRClass = TRI.getRegClass(RISCV::GPRTCRegClassID);
   BitVector Candidates = TRI.getAllocatableSet(MF, GPRClass);
   for (unsigned SetIdx : Candidates.set_bits()) {
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
index c66ca6b86fb86..84448cf9c3f85 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
@@ -149,17 +149,15 @@ template <class BaseT> class RISCVSnippetGenerator : public BaseT {
         RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};
     for (unsigned RegClassID : StandaloneRegClasses)
-      for (unsigned Reg : RegInfo.getRegClass(RegClassID)) {
+      for (unsigned Reg : RegInfo.getRegClass(RegClassID))
-      }
     // Initialize ELEN and VLEN.
-    // FIXME: We could have obtained these two from RISCVSubtarget
+    // FIXME: We could have obtained these two constants from RISCVSubtarget
     // but in order to get that from TargetMachine, we need a Function.
-    const Triple &TT = State.getTargetMachine().getTargetTriple();
-    ELEN = TT.isRISCV32() ? 32 : 64;
     const MCSubtargetInfo &STI = State.getSubtargetInfo();
+    ELEN = STI.checkFeatures("+zve64x") ? 64 : 32;
     std::string ZvlQuery;
     for (unsigned I = 5U, Size = (1 << I); I < 17U; ++I, Size <<= 1) {
       ZvlQuery = "+zvl";
@@ -175,15 +173,15 @@ template <class BaseT> class RISCVSnippetGenerator : public BaseT {
                         const BitVector &ForbiddenRegisters) const override;
-static bool isMaskedSibiling(unsigned MaskedOp, unsigned UnmaskedOp) {
+static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) {
   const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
   return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
 // There are primarily two kinds of opcodes that are not eligible
 // in a serial snippet:
-// (1) Only has a single use operand that can not be overlap with
-// the def operand.
+// (1) Has a use operand that can not overlap with the def operand
+// (i.e. early clobber).
 // (2) The register file of the only use operand is different from
 // that of the def operand. For instance, use operand is vector and
 // the result is a scalar.
@@ -197,6 +195,8 @@ static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
   case RISCV::VCPOP_M:
   case RISCV::VCPOP_V:
+  // The permutation instructions listed below cannot have destination
+  // overlapping with the source.
@@ -204,16 +204,6 @@ static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
-  // The truncate instructions that arraive here are those who cannot
-  // have any overlap between source and dest at all (i.e.
-  // those whoe don't satisfy condition 2 and 3 in RVV spec
-  // 5.2).
     return true;
     return false;
@@ -372,8 +362,8 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
         const auto *RVVBase =
             RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
         if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
-                        isMaskedSibiling(VPseudoOpcode, RVVBase->Pseudo) ||
-                        isMaskedSibiling(RVVBase->Pseudo, VPseudoOpcode))) {
+                        isMaskedSibling(VPseudoOpcode, RVVBase->Pseudo) ||
+                        isMaskedSibling(RVVBase->Pseudo, VPseudoOpcode))) {
           // There is an integrated SEW, remove all but the SEW pushed last.
           SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
@@ -395,7 +385,7 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
-        // The EEW for source operand in VSEXT and VZEXT is a fractional
+        // The EEW for source operand in VSEXT and VZEXT is a fraction
         // of the SEW, hence only SEWs that will lead to valid EEW are allowed.
         if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
           if (*SEW / *Frac < MinSEW) {
@@ -411,7 +401,7 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
                                              Feature_HasStdExtZvkshBit})) {
           if (*SEW != 32)
-            // Zvknhb support SEW=64 as well.
+            // Zvknhb supports SEW=64 as well.
             if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
                                      {Feature_HasStdExtZvknhaOrZvknhbBit})) {

>From 32cb8fe11a043de10d9c7fc0495a5e040538f1a2 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Wed, 26 Feb 2025 14:03:48 -0800
Subject: [PATCH 4/4] fixup! Address review comments

 .../llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
index fbb3d73a88c72..c818f51806076 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
@@ -6,8 +6,8 @@
 // \file
-// Currently there is only one post-processing we need to do for exegesis:
-// Assign a physical register to VSETVL's rd if it's not X0 (i.e. VLMAX).
+// This Pass converts some of the virtual register operands in VSETVLI and FRM
+// pseudos into physical registers.

More information about the llvm-commits mailing list