[llvm] [Exegesis][RISCV] Add initial RVV support (PR #128767)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 14:04:06 PST 2025
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/128767
>From a0651b5b9b3af5beb0cce58e5d5c5d147521e918 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Mon, 24 Feb 2025 11:32:36 -0800
Subject: [PATCH 1/4] [Exegesis][RISCV] Add RVV support
TBA...
---
.../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 37 +
.../RISCV/rvv/eligible-inst.test | 10 +
.../llvm-exegesis/RISCV/rvv/explicit-sew.test | 7 +
.../tools/llvm-exegesis/RISCV/rvv/filter.test | 6 +
.../llvm-exegesis/RISCV/rvv/reduction.test | 7 +
.../RISCV/rvv/self-aliasing.test | 6 +
.../llvm-exegesis/RISCV/rvv/skip-rm.test | 12 +
.../RISCV/rvv/valid-sew-zvk.test | 33 +
.../llvm-exegesis/RISCV/rvv/valid-sew.test | 41 ++
.../llvm-exegesis/RISCV/rvv/vlmax-only.test | 7 +
.../RISCV/rvv/vtype-rm-setup.test | 13 +
.../llvm-exegesis/lib/MCInstrDescView.cpp | 4 +
.../tools/llvm-exegesis/lib/MCInstrDescView.h | 4 +
.../llvm-exegesis/lib/RISCV/CMakeLists.txt | 2 +
.../lib/RISCV/RISCVExegesisPasses.h | 19 +
.../lib/RISCV/RISCVExegesisPostprocessing.cpp | 130 ++++
.../lib/RISCV/RISCVExegesisPreprocessing.cpp | 85 +++
llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp | 687 +++++++++++++++++-
.../lib/SerialSnippetGenerator.cpp | 2 +
llvm/tools/llvm-exegesis/lib/Target.cpp | 13 +
llvm/tools/llvm-exegesis/lib/Target.h | 3 +
llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 15 +-
22 files changed, 1092 insertions(+), 51 deletions(-)
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
create mode 100644 llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
create mode 100644 llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h
create mode 100644 llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
create mode 100644 llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 80ff18d914dca..135aec0c8135c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -432,7 +432,44 @@ enum RoundingMode {
RNE = 1,
RDN = 2,
ROD = 3,
+ Invalid
};
+
+inline static StringRef roundingModeToString(RoundingMode RndMode) {
+ switch (RndMode) {
+ default:
+ llvm_unreachable("Unknown vector fixed-point rounding mode");
+ case RISCVVXRndMode::RNU:
+ return "rnu";
+ case RISCVVXRndMode::RNE:
+ return "rne";
+ case RISCVVXRndMode::RDN:
+ return "rdn";
+ case RISCVVXRndMode::ROD:
+ return "rod";
+ }
+}
+
+inline static RoundingMode stringToRoundingMode(StringRef Str) {
+ return StringSwitch<RoundingMode>(Str)
+ .Case("rnu", RISCVVXRndMode::RNU)
+ .Case("rne", RISCVVXRndMode::RNE)
+ .Case("rdn", RISCVVXRndMode::RDN)
+ .Case("rod", RISCVVXRndMode::ROD)
+ .Default(RISCVVXRndMode::Invalid);
+}
+
+inline static bool isValidRoundingMode(unsigned Mode) {
+ switch (Mode) {
+ default:
+ return false;
+ case RISCVVXRndMode::RNU:
+ case RISCVVXRndMode::RNE:
+ case RISCVVXRndMode::RDN:
+ case RISCVVXRndMode::ROD:
+ return true;
+ }
+}
} // namespace RISCVVXRndMode
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
new file mode 100644
index 0000000000000..189adf2c1b334
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
@@ -0,0 +1,10 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT
+
+# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
+# LATENCY-NOT: PseudoVCPOP_M_B32
+
+# RTHROUGHPUT: PseudoVCOMPRESS_VM_M2_E8
+# RTHROUGHPUT: PseudoVCPOP_M_B32
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
new file mode 100644
index 0000000000000..476cf35818d6f
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
@@ -0,0 +1,7 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
+# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
+
+# Make sure none of the config has SEW other than e32
+# CHECK: PseudoVFWREDUSUM_VS_M1_E32
+# CHECK: SEW: e32
+# CHECK-NOT: SEW: e{{(8|16|64)}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
new file mode 100644
index 0000000000000..e3a4336fdf670
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
@@ -0,0 +1,6 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK \
+# RUN: --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
+
+# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e8, Policy: ta/mu}'
+# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e16, Policy: ta/mu}'
+# CHECK-NOT: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e(32|64), Policy: ta/mu}'
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
new file mode 100644
index 0000000000000..a637fa24af16b
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
@@ -0,0 +1,7 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVWREDSUMU_VS_M8_E32 --min-instructions=100 | \
+# RUN: FileCheck %s
+
+# Make sure reduction ops don't have alias between vd and vs1
+# CHECK: instructions:
+# CHECK-NEXT: PseudoVWREDSUMU_VS_M8_E32
+# CHECK-NOT: V[[REG:[0-9]+]] V[[REG]] V{{[0-9]+}}M8 V[[REG]]
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
new file mode 100644
index 0000000000000..c950341716238
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
@@ -0,0 +1,6 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVXOR_VX_M4 --min-instructions=100 | \
+# RUN: FileCheck %s
+
+# Make sure all def / use operands are the same in latency mode.
+# CHECK: instructions:
+# CHECK-NEXT: PseudoVXOR_VX_M4 V[[REG:[0-9]+]]M4 V[[REG]]M4 V[[REG]]M4 X{{.*}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
new file mode 100644
index 0000000000000..a3af37149eeb5
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
@@ -0,0 +1,12 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
+# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VXRM
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
+# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRM
+
+# VXRM: PseudoVAADDU_VV_M1
+# VXRM: VXRM: rnu
+# VXRM-NOT: VXRM: {{(rne|rdn|rod)}}
+
+# FRM: PseudoVFADD_VFPR16_M1_E16
+# FRM: FRM: rne
+# FRM-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
new file mode 100644
index 0000000000000..515d3397b57be
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
@@ -0,0 +1,33 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVAESDF_VS_M1_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=ZVK
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVGHSH_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=ZVK
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVSM4K_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=ZVK
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVSM3C_VI_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=ZVK
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVSHA2MS_VV_M1_E32 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVSHA2MS_VV_M2_E64 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVSM3C_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --allow-empty --check-prefix=EMPTY
+
+# Most vector crypto only supports SEW=32, except Zvknhb which also supports SEW=64
+# ZVK-NOT: SEW: e{{(8|16)}}
+# ZVK: SEW: e32
+# ZVK-NOT: SEW: e64
+
+# ZVKNH(A|B) can either have SEW=32 (EGW=128) or SEW=64 (EGW=256)
+
+# ZVKNH-NOT: SEW: e{{(8|16)}}
+# ZVKNH: SEW: e{{(32|64)}}
+
+# EMPTY-NOT: SEW: e{{(8|16|32|64)}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
new file mode 100644
index 0000000000000..b678300564529
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
@@ -0,0 +1,41 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVMUL_VV_MF4_MASK \
+# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRAC-LMUL
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN: --opcode-name=PseudoVFADD_VFPR16_M1_E16,PseudoVFADD_VV_M2_E16,PseudoVFCLASS_V_MF2 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=FP
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVSEXT_VF8_M2,PseudoVZEXT_VF8_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=VEXT
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN: --opcode-name=PseudoVFREDUSUM_VS_M1_E16 --max-configs-per-opcode=1000 --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=VFRED --allow-empty
+
+# Make sure only the supported SEWs are generated for fractional LMUL.
+# FRAC-LMUL: PseudoVMUL_VV_MF4_MASK
+# FRAC-LMUL: SEW: e8
+# FRAC-LMUL: SEW: e16
+# FRAC-LMUL-NOT: SEW: e{{(32|64)}}
+
+# Make sure only SEWs that are equal to the supported FLEN are generated
+# FP: PseudoVFADD_VFPR16_M1_E16
+# FP-NOT: SEW: e8
+# FP: PseudoVFADD_VV_M2_E16
+# FP-NOT: SEW: e8
+# FP: PseudoVFCLASS_V_MF2
+# FP-NOT: SEW: e8
+
+# VS/ZEXT can only operate on SEW that will not lead to invalid EEW on the
+# source operand.
+# VEXT: PseudoVSEXT_VF8_M2
+# VEXT-NOT: SEW: e8
+# VEXT-NOT: SEW: e16
+# VEXT-NOT: SEW: e32
+# VEXT: SEW: e64
+# VEXT: PseudoVZEXT_VF8_M2
+# VEXT-NOT: SEW: e8
+# VEXT-NOT: SEW: e16
+# VEXT-NOT: SEW: e32
+# VEXT: SEW: e64
+
+# P470 doesn't have Zvfh so 16-bit vfredusum shouldn't exist
+# VFRED-NOT: PseudoVFREDUSUM_VS_M1_E16
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
new file mode 100644
index 0000000000000..30897b6e13735
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
@@ -0,0 +1,7 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
+# RUN: --riscv-vlmax-for-vl --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s
+
+# Only allow VLMAX for AVL when -riscv-vlmax-for-vl is present
+# CHECK: PseudoVFWREDUSUM_VS_M1_E32
+# CHECK: AVL: VLMAX
+# CHECK-NOT: AVL: {{(simm5|<MCOperand: .*>)}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
new file mode 100644
index 0000000000000..c41b357c13821
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
@@ -0,0 +1,13 @@
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
+# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
+# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VFWREDUSUM
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVSSRL_VX_MF4 \
+# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
+# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VSSRL
+
+# Make sure the correct VSETVL / VXRM write / FRM write instructions are generated
+# VFWREDUSUM: vsetvli {{.*}}, zero, e32, m1, tu, ma
+# VFWREDUSUM: fsrmi {{.*}}, 0x0
+
+# VSSRL: vsetvli {{.*}}, zero, e8, mf4, tu, ma
+# VSSRL: csrwi vxrm, 0x0
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index c002f68b427f7..e0e796cee8040 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -50,6 +50,8 @@ bool Operand::isTied() const { return TiedToIndex.has_value(); }
bool Operand::isVariable() const { return VariableIndex.has_value(); }
+bool Operand::isEarlyClobber() const { return IsEarlyClobber; }
+
bool Operand::isMemory() const {
return isExplicit() &&
getExplicitOperandInfo().OperandType == MCOI::OPERAND_MEMORY;
@@ -115,6 +117,8 @@ Instruction::create(const MCInstrInfo &InstrInfo,
Operand Operand;
Operand.Index = OpIndex;
Operand.IsDef = (OpIndex < Description->getNumDefs());
+ Operand.IsEarlyClobber =
+ (Description->getOperandConstraint(OpIndex, MCOI::EARLY_CLOBBER) != -1);
// TODO(gchatelet): Handle isLookupPtrRegClass.
if (OpInfo.RegClass >= 0)
Operand.Tracker = &RATC.getRegisterClass(OpInfo.RegClass);
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index c1af10fa460a3..0a62967897c79 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -72,6 +72,7 @@ struct Operand {
bool isVariable() const;
bool isMemory() const;
bool isImmediate() const;
+ bool isEarlyClobber() const;
unsigned getIndex() const;
unsigned getTiedToIndex() const;
unsigned getVariableIndex() const;
@@ -82,6 +83,7 @@ struct Operand {
// Please use the accessors above and not the following fields.
std::optional<uint8_t> Index;
bool IsDef = false;
+ bool IsEarlyClobber = false;
const RegisterAliasingTracker *Tracker = nullptr; // Set for Register Op.
const MCOperandInfo *Info = nullptr; // Set for Explicit Op.
std::optional<uint8_t> TiedToIndex; // Set for Reg&Explicit Op.
@@ -115,6 +117,8 @@ struct Instruction {
Instruction &operator=(const Instruction &) = delete;
Instruction &operator=(Instruction &&) = delete;
+ unsigned getOpcode() const { return Description.getOpcode(); }
+
// Returns the Operand linked to this Variable.
// In case the Variable is tied, the primary (i.e. Def) Operand is returned.
const Operand &getPrimaryOperand(const Variable &Var) const;
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
index 489ac6d6e34b3..d379874fa1d0e 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
@@ -14,6 +14,8 @@ set(LLVM_LINK_COMPONENTS
add_llvm_library(LLVMExegesisRISCV
DISABLE_LLVM_LINK_LLVM_DYLIB
STATIC
+ RISCVExegesisPreprocessing.cpp
+ RISCVExegesisPostprocessing.cpp
Target.cpp
DEPENDS
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h
new file mode 100644
index 0000000000000..f206966331756
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPasses.h
@@ -0,0 +1,19 @@
+//===- RISCVExegesisPasses.h - RISC-V specific Exegesis Passes --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_EXEGESIS_LIB_RISCV_RISCVEXEGESISPASSES_H
+#define LLVM_TOOLS_EXEGESIS_LIB_RISCV_RISCVEXEGESISPASSES_H
+namespace llvm {
+class FunctionPass;
+
+namespace exegesis {
+FunctionPass *createRISCVPreprocessingPass();
+FunctionPass *createRISCVPostprocessingPass();
+} // namespace exegesis
+} // namespace llvm
+#endif
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
new file mode 100644
index 0000000000000..e25cf04a01d9e
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
@@ -0,0 +1,130 @@
+//===- RISCVExegesisPostprocessing.cpp - Post processing MI for exegesis---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// \file
+// Currently there is only one post-processing we need to do for exegesis:
+// Assign a physical register to VSETVL's rd if it's not X0 (i.e. VLMAX).
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVExegesisPasses.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-exegesis-post-processing"
+
+namespace {
+struct RISCVExegesisPostprocessing : public MachineFunctionPass {
+ static char ID;
+
+ RISCVExegesisPostprocessing() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ // Extremely simple register allocator that picks a register that hasn't
+ // been defined or used in this function.
+ Register allocateGPRRegister(const MachineFunction &MF,
+ const MachineRegisterInfo &MRI);
+
+ bool processVSETVL(MachineInstr &MI, MachineRegisterInfo &MRI);
+ bool processWriteFRM(MachineInstr &MI, MachineRegisterInfo &MRI);
+};
+} // anonymous namespace
+
+char RISCVExegesisPostprocessing::ID = 0;
+
+bool RISCVExegesisPostprocessing::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case RISCV::VSETVLI:
+ case RISCV::VSETVL:
+ case RISCV::PseudoVSETVLI:
+ case RISCV::PseudoVSETVLIX0:
+ Changed |= processVSETVL(MI, MF.getRegInfo());
+ break;
+ case RISCV::SwapFRMImm:
+ case RISCV::WriteFRM:
+ Changed |= processWriteFRM(MI, MF.getRegInfo());
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (Changed)
+ MF.getRegInfo().clearVirtRegs();
+
+ LLVM_DEBUG(MF.print(dbgs() << "===After RISCVExegesisPostprocessing===\n");
+ dbgs() << "\n");
+
+ return Changed;
+}
+
+Register RISCVExegesisPostprocessing::allocateGPRRegister(
+ const MachineFunction &MF, const MachineRegisterInfo &MRI) {
+ const auto &TRI = *MRI.getTargetRegisterInfo();
+
+ const TargetRegisterClass *GPRClass =
+ TRI.getRegClass(RISCV::GPRJALRRegClassID);
+ BitVector Candidates = TRI.getAllocatableSet(MF, GPRClass);
+
+ for (unsigned SetIdx : Candidates.set_bits()) {
+ if (MRI.reg_empty(Register(SetIdx)))
+ return Register(SetIdx);
+ }
+
+ // All bets are off, assign a fixed one.
+ return RISCV::X5;
+}
+
+bool RISCVExegesisPostprocessing::processVSETVL(MachineInstr &MI,
+ MachineRegisterInfo &MRI) {
+ bool Changed = false;
+ // Replace both AVL and VL (i.e. the result) operands with physical
+ // registers.
+ for (unsigned Idx = 0U; Idx < 2; ++Idx)
+ if (MI.getOperand(Idx).isReg()) {
+ Register RegOp = MI.getOperand(Idx).getReg();
+ if (RegOp.isVirtual()) {
+ MRI.replaceRegWith(RegOp, allocateGPRRegister(*MI.getMF(), MRI));
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+bool RISCVExegesisPostprocessing::processWriteFRM(MachineInstr &MI,
+ MachineRegisterInfo &MRI) {
+ // The virtual register will be the first operand in both SwapFRMImm and
+ // WriteFRM.
+ if (MI.getOperand(0).isReg()) {
+ Register DestReg = MI.getOperand(0).getReg();
+ if (DestReg.isVirtual()) {
+ MRI.replaceRegWith(DestReg, allocateGPRRegister(*MI.getMF(), MRI));
+ return true;
+ }
+ }
+ return false;
+}
+
+FunctionPass *llvm::exegesis::createRISCVPostprocessingPass() {
+ return new RISCVExegesisPostprocessing();
+}
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp
new file mode 100644
index 0000000000000..7f1cfd9ea52df
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPreprocessing.cpp
@@ -0,0 +1,85 @@
+//===- RISCVExegesisPreprocessing.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVExegesisPasses.h"
+#include "RISCVRegisterInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-exegesis-preprocessing"
+
+namespace {
+struct RISCVExegesisPreprocessing : public MachineFunctionPass {
+ static char ID;
+
+ RISCVExegesisPreprocessing() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // anonymous namespace
+
+char RISCVExegesisPreprocessing::ID = 0;
+
+static bool processAVLOperand(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII) {
+ const MCInstrDesc &Desc = TII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ if (!RISCVII::hasVLOp(TSFlags))
+ return false;
+
+ const MachineOperand &VLOp = MI.getOperand(RISCVII::getVLOpNum(Desc));
+ if (VLOp.isReg()) {
+ Register VLReg = VLOp.getReg();
+ if (VLReg.isVirtual())
+ return false;
+ assert(RISCV::GPRRegClass.contains(VLReg));
+ // Replace all uses of the original physical register with a new virtual
+ // register. The only reason we can do such replacement here is because it's
+ // almost certain that VLReg only has a single definition.
+ Register NewVLReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ MRI.replaceRegWith(VLReg, NewVLReg);
+ return true;
+ }
+
+ return false;
+}
+
+bool RISCVExegesisPreprocessing::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const auto &STI = MF.getSubtarget<RISCVSubtarget>();
+ if (!STI.hasVInstructions())
+ return false;
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ LLVM_DEBUG(MF.print(dbgs() << "===Before RISCVExegesisPoreprocessing===\n");
+ dbgs() << "\n");
+
+ bool Changed = false;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB) {
+ Changed |= processAVLOperand(MI, MRI, TII);
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::exegesis::createRISCVPreprocessingPass() {
+ return new RISCVExegesisPreprocessing();
+}
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
index d70f609c5e080..c66ca6b86fb86 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
@@ -7,11 +7,19 @@
//===----------------------------------------------------------------------===//
#include "../Target.h"
+#include "../ParallelSnippetGenerator.h"
+#include "../SerialSnippetGenerator.h"
+#include "../SnippetGenerator.h"
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "MCTargetDesc/RISCVMatInt.h"
+#include "RISCV.h"
+#include "RISCVExegesisPasses.h"
#include "RISCVInstrInfo.h"
+#include "RISCVRegisterInfo.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
// include computeAvailableFeatures and computeRequiredFeatures.
#define GET_AVAILABLE_OPCODE_CHECKER
@@ -24,10 +32,584 @@
namespace llvm {
namespace exegesis {
+static cl::opt<bool>
+ OnlyUsesVLMAXForVL("riscv-vlmax-for-vl",
+ cl::desc("Only enumerate VLMAX for VL operand"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ EnumerateRoundingModes("riscv-enumerate-rounding-modes",
+ cl::desc("Enumerate different FRM and VXRM"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<std::string>
+ FilterConfig("riscv-filter-config",
+ cl::desc("Show only the configs matching this regex"),
+ cl::init(""), cl::Hidden);
+
#include "RISCVGenExegesis.inc"
namespace {
+template <class BaseT> class RISCVSnippetGenerator : public BaseT {
+ static void printRoundingMode(raw_ostream &OS, unsigned Val, bool UsesVXRM) {
+ if (UsesVXRM) {
+ assert(RISCVVXRndMode::isValidRoundingMode(Val));
+ OS << RISCVVXRndMode::roundingModeToString(
+ static_cast<RISCVVXRndMode::RoundingMode>(Val));
+ } else {
+ assert(RISCVFPRndMode::isValidRoundingMode(Val));
+ OS << RISCVFPRndMode::roundingModeToString(
+ static_cast<RISCVFPRndMode::RoundingMode>(Val));
+ }
+ }
+
+ static constexpr unsigned MinSEW = 8;
+ // ELEN is basically SEW_max.
+ unsigned ELEN = 64;
+
+ // We can't know the real min/max VLEN w/o a Function, so we're
+ // using the VLen from Zvl.
+ unsigned ZvlVLen = 32;
+
+ /// Mask for registers that are NOT standalone registers like X0 and V0
+ BitVector AggregateRegisters;
+
+ // Returns true when opcode is available in any of the FBs.
+ static bool
+ isOpcodeAvailableIn(unsigned Opcode,
+ ArrayRef<RISCV_MC::SubtargetFeatureBits> FBs) {
+ FeatureBitset RequiredFeatures = RISCV_MC::computeRequiredFeatures(Opcode);
+ for (uint8_t FB : FBs) {
+ if (RequiredFeatures[FB])
+ return true;
+ }
+ return false;
+ }
+
+ static bool isRVVFloatingPointOp(unsigned Opcode) {
+ return isOpcodeAvailableIn(Opcode,
+ {RISCV_MC::Feature_HasVInstructionsAnyFBit});
+ }
+
+ // Get the element group width of each vector cryptor extension.
+ static unsigned getZvkEGWSize(unsigned Opcode, unsigned SEW) {
+ using namespace RISCV_MC;
+ if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkgBit,
+ Feature_HasStdExtZvknedBit,
+ Feature_HasStdExtZvksedBit}))
+ return 128U;
+ if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkshBit}))
+ return 256U;
+ if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvknhaOrZvknhbBit}))
+ // In Zvknh[ab], when SEW=64 is used (i.e. Zvknhb), EGW is 256.
+ // Otherwise it's 128.
+ return SEW == 64 ? 256U : 128U;
+
+ llvm_unreachable("Unsupported opcode");
+ }
+
+ // A handy utility to multiply or divide an integer by LMUL.
+ template <typename T> static T multiplyLMul(T Val, RISCVVType::VLMUL VLMul) {
+ auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
+ return IsFractional ? Val / LMul : Val * LMul;
+ }
+
+ /// Return the denominator of the fractional (i.e. the `x` in .vfx suffix) or
+ /// nullopt if BaseOpcode is not a vector sext/zext.
+ static std::optional<unsigned> isRVVSignZeroExtend(unsigned BaseOpcode) {
+ switch (BaseOpcode) {
+ case RISCV::VSEXT_VF2:
+ case RISCV::VZEXT_VF2:
+ return 2;
+ case RISCV::VSEXT_VF4:
+ case RISCV::VZEXT_VF4:
+ return 4;
+ case RISCV::VSEXT_VF8:
+ case RISCV::VZEXT_VF8:
+ return 8;
+ default:
+ return std::nullopt;
+ }
+ }
+
+ void annotateWithVType(const CodeTemplate &CT, const Instruction &Instr,
+ unsigned BaseOpcode,
+ const BitVector &ForbiddenRegisters,
+ std::vector<CodeTemplate> &Result) const;
+
+public:
+ RISCVSnippetGenerator(const LLVMState &State,
+ const SnippetGenerator::Options &Opts)
+ : BaseT(State, Opts),
+ AggregateRegisters(State.getRegInfo().getNumRegs(), /*initVal=*/true) {
+ // Initialize standalone registers mask.
+ const MCRegisterInfo &RegInfo = State.getRegInfo();
+ const unsigned StandaloneRegClasses[] = {
+ RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};
+
+ for (unsigned RegClassID : StandaloneRegClasses)
+ for (unsigned Reg : RegInfo.getRegClass(RegClassID)) {
+ AggregateRegisters.reset(Reg);
+ }
+
+ // Initialize ELEN and VLEN.
+ // FIXME: We could have obtained these two from RISCVSubtarget
+ // but in order to get that from TargetMachine, we need a Function.
+ const Triple &TT = State.getTargetMachine().getTargetTriple();
+ ELEN = TT.isRISCV32() ? 32 : 64;
+
+ const MCSubtargetInfo &STI = State.getSubtargetInfo();
+ std::string ZvlQuery;
+ for (unsigned I = 5U, Size = (1 << I); I < 17U; ++I, Size <<= 1) {
+ ZvlQuery = "+zvl";
+ raw_string_ostream SS(ZvlQuery);
+ SS << Size << "b";
+ if (STI.checkFeatures(SS.str()) && ZvlVLen < Size)
+ ZvlVLen = Size;
+ }
+ }
+
+ Expected<std::vector<CodeTemplate>>
+ generateCodeTemplates(InstructionTemplate Variant,
+ const BitVector &ForbiddenRegisters) const override;
+};
+
+static bool isMaskedSibiling(unsigned MaskedOp, unsigned UnmaskedOp) {
+ const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
+ return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
+}
+
+// There are primarily two kinds of opcodes that are not eligible
+// in a serial snippet:
+// (1) Only has a single use operand that can not be overlap with
+// the def operand.
+// (2) The register file of the only use operand is different from
+// that of the def operand. For instance, use operand is vector and
+// the result is a scalar.
+static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
+ const Instruction &I) {
+ if (llvm::any_of(I.Operands,
+ [](const Operand &Op) { return Op.isEarlyClobber(); }))
+ return true;
+
+ switch (BaseOpcode) {
+ case RISCV::VCOMPRESS_VM:
+ case RISCV::VCPOP_M:
+ case RISCV::VCPOP_V:
+ case RISCV::VRGATHEREI16_VV:
+ case RISCV::VRGATHER_VI:
+ case RISCV::VRGATHER_VV:
+ case RISCV::VRGATHER_VX:
+ case RISCV::VSLIDE1UP_VX:
+ case RISCV::VSLIDEUP_VI:
+ case RISCV::VSLIDEUP_VX:
+ // The truncate instructions that arraive here are those who cannot
+ // have any overlap between source and dest at all (i.e.
+ // those whoe don't satisfy condition 2 and 3 in RVV spec
+ // 5.2).
+ case RISCV::VNCLIPU_WI:
+ case RISCV::VNCLIPU_WV:
+ case RISCV::VNCLIPU_WX:
+ case RISCV::VNCLIP_WI:
+ case RISCV::VNCLIP_WV:
+ case RISCV::VNCLIP_WX:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool isZvfhminZvfbfminOpcodes(unsigned BaseOpcode) {
+ switch (BaseOpcode) {
+ case RISCV::VFNCVT_F_F_W:
+ case RISCV::VFWCVT_F_F_V:
+ case RISCV::VFNCVTBF16_F_F_W:
+ case RISCV::VFWCVTBF16_F_F_V:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool isVectorReduction(unsigned BaseOpcode) {
+ switch (BaseOpcode) {
+ case RISCV::VREDAND_VS:
+ case RISCV::VREDMAXU_VS:
+ case RISCV::VREDMAX_VS:
+ case RISCV::VREDMINU_VS:
+ case RISCV::VREDMIN_VS:
+ case RISCV::VREDOR_VS:
+ case RISCV::VREDSUM_VS:
+ case RISCV::VREDXOR_VS:
+ case RISCV::VWREDSUMU_VS:
+ case RISCV::VWREDSUM_VS:
+ case RISCV::VFREDMAX_VS:
+ case RISCV::VFREDMIN_VS:
+ case RISCV::VFREDOSUM_VS:
+ case RISCV::VFREDUSUM_VS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+template <class BaseT>
+void RISCVSnippetGenerator<BaseT>::annotateWithVType(
+ const CodeTemplate &OrigCT, const Instruction &Instr, unsigned BaseOpcode,
+ const BitVector &ForbiddenRegisters,
+ std::vector<CodeTemplate> &Result) const {
+ const MCSubtargetInfo &STI = SnippetGenerator::State.getSubtargetInfo();
+ unsigned VPseudoOpcode = Instr.getOpcode();
+
+ bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;
+
+ const MCInstrDesc &MIDesc = Instr.Description;
+ const uint64_t TSFlags = MIDesc.TSFlags;
+
+ RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+
+ const size_t StartingResultSize = Result.size();
+
+ SmallPtrSet<const Operand *, 4> VTypeOperands;
+ std::optional<AliasingConfigurations> SelfAliasing;
+ // Exegesis see instructions with tied operands being inherently serial.
+ // But for RVV instructions, those tied operands are passthru rather
+ // than real read operands. So we manually put dependency between
+ // destination (i.e. def) and any of the non-tied/SEW/policy/AVL/RM
+ // operands.
+ auto assignSerialRVVOperands = [&, this](InstructionTemplate &IT) {
+ // Initialize SelfAliasing on first use.
+ if (!SelfAliasing.has_value()) {
+ BitVector ExcludeRegs = ForbiddenRegisters;
+ ExcludeRegs |= AggregateRegisters;
+ SelfAliasing = AliasingConfigurations(Instr, Instr, ExcludeRegs);
+ bool EmptyUses = false;
+ for (auto &ARO : SelfAliasing->Configurations) {
+ auto &Uses = ARO.Uses;
+ for (auto ROA = Uses.begin(); ROA != Uses.end();) {
+ const Operand *Op = ROA->Op;
+ // Exclude tied operand(s).
+ if (Op->isTied()) {
+ ROA = Uses.erase(ROA);
+ continue;
+ }
+
+ // Special handling for reduction operations: for a given reduction
+ // `vredop vd, vs2, vs1`, we don't want vd to be aliased with vs1
+ // since we're only reading `vs1[0]` and many implementations
+ // optimize for this case (e.g. chaining). Instead, we're forcing
+ // it to create alias between vd and vs2.
+ if (isVectorReduction(BaseOpcode) &&
+ // vs1's operand index is always 3.
+ Op->getIndex() == 3) {
+ ROA = Uses.erase(ROA);
+ continue;
+ }
+
+ // Exclude any special operands like SEW and VL -- we've already
+ // assigned values to them.
+ if (VTypeOperands.count(Op)) {
+ ROA = Uses.erase(ROA);
+ continue;
+ }
+ ++ROA;
+ }
+
+ // If any of the use operand candidate lists is empty, there is
+ // no point to assign self aliasing registers.
+ if (Uses.empty()) {
+ EmptyUses = true;
+ break;
+ }
+ }
+ if (EmptyUses)
+ SelfAliasing->Configurations.clear();
+ }
+
+ // This is a self aliasing instruction so defs and uses are from the same
+ // instance, hence twice IT in the following call.
+ if (!SelfAliasing->empty() && !SelfAliasing->hasImplicitAliasing())
+ setRandomAliasing(*SelfAliasing, IT, IT);
+ };
+
+ // We are going to create a CodeTemplate (configuration) for each supported
+ // SEW, policy, and VL.
+ // FIXME: Account for EEW and EMUL.
+ SmallVector<std::optional<unsigned>, 4> Log2SEWs;
+ SmallVector<std::optional<unsigned>, 4> Policies;
+ SmallVector<std::optional<int>, 3> AVLs;
+ SmallVector<std::optional<unsigned>, 8> RoundingModes;
+
+ bool HasSEWOp = RISCVII::hasSEWOp(TSFlags);
+ bool HasPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
+ bool HasVLOp = RISCVII::hasVLOp(TSFlags);
+ bool HasRMOp = RISCVII::hasRoundModeOp(TSFlags);
+ bool UsesVXRM = RISCVII::usesVXRM(TSFlags);
+
+ if (HasSEWOp) {
+ const Operand &SEWOp = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
+ VTypeOperands.insert(&SEWOp);
+
+ if (SEWOp.Info->OperandType == RISCVOp::OPERAND_SEW_MASK) {
+ // If it's a mask-producing instruction, the SEW operand is always zero.
+ Log2SEWs.push_back(0);
+ } else {
+ SmallVector<unsigned, 4> SEWCandidates;
+
+ // (RVV spec 3.4.2) For fractional LMUL, the supported SEW are between
+ // [SEW_min, LMUL * ELEN].
+ unsigned SEWUpperBound =
+ VLMul >= RISCVVType::LMUL_F8 ? multiplyLMul(ELEN, VLMul) : ELEN;
+ for (unsigned SEW = MinSEW; SEW <= SEWUpperBound; SEW <<= 1) {
+ SEWCandidates.push_back(SEW);
+
+ // Some scheduling classes already integrate SEW; only put
+ // their corresponding SEW values at the SEW operands.
+ // NOTE: It is imperative to put this condition in the front, otherwise
+ // it is tricky and difficult to know if there is an integrated
+ // SEW after other rules are applied to filter the candidates.
+ const auto *RVVBase =
+ RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
+ if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
+ isMaskedSibiling(VPseudoOpcode, RVVBase->Pseudo) ||
+ isMaskedSibiling(RVVBase->Pseudo, VPseudoOpcode))) {
+ // There is an integrated SEW, remove all but the SEW pushed last.
+ SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
+ break;
+ }
+ }
+
+ // Filter out some candidates.
+ for (auto SEW = SEWCandidates.begin(); SEW != SEWCandidates.end();) {
+ // For floating point operations, only select SEW of the supported FLEN.
+ if (isRVVFloatingPointOp(VPseudoOpcode)) {
+ bool Supported = false;
+ Supported |= isZvfhminZvfbfminOpcodes(BaseOpcode) && *SEW == 16;
+ Supported |= STI.hasFeature(RISCV::FeatureStdExtZvfh) && *SEW == 16;
+ Supported |= STI.hasFeature(RISCV::FeatureStdExtF) && *SEW == 32;
+ Supported |= STI.hasFeature(RISCV::FeatureStdExtD) && *SEW == 64;
+ if (!Supported) {
+ SEW = SEWCandidates.erase(SEW);
+ continue;
+ }
+ }
+
+ // The EEW for source operand in VSEXT and VZEXT is a fractional
+ // of the SEW, hence only SEWs that will lead to valid EEW are allowed.
+ if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
+ if (*SEW / *Frac < MinSEW) {
+ SEW = SEWCandidates.erase(SEW);
+ continue;
+ }
+
+ // Most vector crypto 1.0 instructions only work on SEW=32.
+ using namespace RISCV_MC;
+ if (isOpcodeAvailableIn(BaseOpcode, {Feature_HasStdExtZvkgBit,
+ Feature_HasStdExtZvknedBit,
+ Feature_HasStdExtZvknhaOrZvknhbBit,
+ Feature_HasStdExtZvksedBit,
+ Feature_HasStdExtZvkshBit})) {
+ if (*SEW != 32)
+ // Zvknhb support SEW=64 as well.
+ if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
+ !isOpcodeAvailableIn(BaseOpcode,
+ {Feature_HasStdExtZvknhaOrZvknhbBit})) {
+ SEW = SEWCandidates.erase(SEW);
+ continue;
+ }
+
+ // We're also enforcing the requirement of `LMUL * VLEN >= EGW` here,
+ // because some of the extensions have SEW-dependant EGW.
+ unsigned EGW = getZvkEGWSize(BaseOpcode, *SEW);
+ if (multiplyLMul(ZvlVLen, VLMul) < EGW) {
+ SEW = SEWCandidates.erase(SEW);
+ continue;
+ }
+ }
+
+ ++SEW;
+ }
+
+ // We're not going to produce any result with zero SEW candidate.
+ if (SEWCandidates.empty())
+ return;
+
+ for (unsigned SEW : SEWCandidates)
+ Log2SEWs.push_back(Log2_32(SEW));
+ }
+ } else {
+ Log2SEWs.push_back(std::nullopt);
+ }
+
+ if (HasPolicyOp) {
+ VTypeOperands.insert(&Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)]);
+
+ Policies = {0, RISCVVType::TAIL_AGNOSTIC, RISCVVType::MASK_AGNOSTIC,
+ (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)};
+ } else {
+ Policies.push_back(std::nullopt);
+ }
+
+ if (HasVLOp) {
+ VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc)]);
+
+ if (OnlyUsesVLMAXForVL)
+ AVLs.push_back(-1);
+ else
+ AVLs = {// 5-bit immediate value
+ 1,
+ // VLMAX
+ -1,
+ // Non-X0 register
+ 0};
+ } else {
+ AVLs.push_back(std::nullopt);
+ }
+
+ if (HasRMOp) {
+ VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1]);
+
+ // If we're not enumerating all rounding modes,
+ // use zero (rne in FRM and rnu in VXRM) as the default
+ // mode.
+ RoundingModes = {0U};
+ if (EnumerateRoundingModes) {
+ RoundingModes.append({1, 2, 3});
+ if (!UsesVXRM)
+ // FRM values 5 and 6 are currently reserved.
+ RoundingModes.append({4, 7});
+ }
+ } else {
+ RoundingModes = {std::nullopt};
+ }
+
+ std::set<std::tuple<std::optional<unsigned>, std::optional<int>,
+ std::optional<unsigned>, std::optional<unsigned>>>
+ Combinations;
+ for (auto AVL : AVLs) {
+ for (auto Log2SEW : Log2SEWs)
+ for (auto Policy : Policies) {
+ for (auto RM : RoundingModes)
+ Combinations.insert(std::make_tuple(RM, AVL, Log2SEW, Policy));
+ }
+ }
+
+ std::string ConfigStr;
+ SmallVector<std::pair<const Operand *, MCOperand>, 4> ValueAssignments;
+ for (const auto &[RM, AVL, Log2SEW, Policy] : Combinations) {
+ InstructionTemplate IT(&Instr);
+
+ ListSeparator LS;
+ ConfigStr = "vtype = {";
+ raw_string_ostream SS(ConfigStr);
+
+ ValueAssignments.clear();
+
+ if (RM) {
+ const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1];
+ ValueAssignments.push_back({&Op, MCOperand::createImm(*RM)});
+ printRoundingMode(SS << LS << (UsesVXRM ? "VXRM" : "FRM") << ": ", *RM,
+ UsesVXRM);
+ }
+
+ if (AVL) {
+ MCOperand OpVal;
+ if (*AVL < 0) {
+ // VLMAX
+ OpVal = MCOperand::createImm(-1);
+ SS << LS << "AVL: VLMAX";
+ } else if (*AVL == 0) {
+ // A register holding AVL.
+ // TODO: Generate a random register.
+ OpVal = MCOperand::createReg(RISCV::X5);
+ OpVal.print(SS << LS << "AVL: ");
+ } else {
+ // A 5-bit immediate.
+ // The actual value assignment is deferred to
+ // RISCVExegesisTarget::randomizeTargetMCOperand.
+ SS << LS << "AVL: simm5";
+ }
+ if (OpVal.isValid()) {
+ const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc)];
+ ValueAssignments.push_back({&Op, OpVal});
+ }
+ }
+
+ if (Log2SEW) {
+ const Operand &Op = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
+ ValueAssignments.push_back({&Op, MCOperand::createImm(*Log2SEW)});
+ SS << LS << "SEW: e" << (*Log2SEW ? 1 << *Log2SEW : 8);
+ }
+
+ if (Policy) {
+ const Operand &Op = Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)];
+ ValueAssignments.push_back({&Op, MCOperand::createImm(*Policy)});
+ SS << LS
+ << "Policy: " << (*Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu")
+ << "/" << (*Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
+ }
+
+ SS << "}";
+
+ // Filter out some configurations, if needed.
+ if (!FilterConfig.empty()) {
+ if (!Regex(FilterConfig).match(ConfigStr))
+ continue;
+ }
+
+ CodeTemplate CT = OrigCT.clone();
+ CT.Config = std::move(ConfigStr);
+ for (InstructionTemplate &IT : CT.Instructions) {
+ if (IsSerial) {
+ // Reset this template's value assignments and do it
+ // ourselves.
+ IT = InstructionTemplate(&Instr);
+ assignSerialRVVOperands(IT);
+ }
+
+ for (const auto &[Op, OpVal] : ValueAssignments)
+ IT.getValueFor(*Op) = OpVal;
+ }
+ Result.push_back(std::move(CT));
+ if (Result.size() - StartingResultSize >=
+ SnippetGenerator::Opts.MaxConfigsPerOpcode)
+ return;
+ }
+}
+
+template <class BaseT>
+Expected<std::vector<CodeTemplate>>
+RISCVSnippetGenerator<BaseT>::generateCodeTemplates(
+ InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
+ const Instruction &Instr = Variant.getInstr();
+
+ bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;
+
+ unsigned BaseOpcode = RISCV::getRVVMCOpcode(Instr.getOpcode());
+
+ // Bail out ineligible opcodes before generating base code templates since
+ // the latter is quite expensive.
+ if (IsSerial && BaseOpcode && isIneligibleOfSerialSnippets(BaseOpcode, Instr))
+ return std::vector<CodeTemplate>{};
+
+ auto BaseCodeTemplates =
+ BaseT::generateCodeTemplates(Variant, ForbiddenRegisters);
+ if (!BaseCodeTemplates)
+ return BaseCodeTemplates.takeError();
+
+ if (!BaseOpcode)
+ return BaseCodeTemplates;
+
+ // Specialize for RVV pseudo.
+ std::vector<CodeTemplate> ExpandedTemplates;
+ for (const auto &BaseCT : *BaseCodeTemplates)
+ annotateWithVType(BaseCT, Instr, BaseOpcode, ForbiddenRegisters,
+ ExpandedTemplates);
+
+ return ExpandedTemplates;
+}
+
// Stores constant value to a general-purpose (integer) register.
static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI,
MCRegister Reg, const APInt &Value) {
@@ -75,30 +657,10 @@ static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI,
return Instrs;
}
-static MCInst nop() {
- // ADDI X0, X0, 0
- return MCInstBuilder(RISCV::ADDI)
- .addReg(RISCV::X0)
- .addReg(RISCV::X0)
- .addImm(0);
-}
-
-static bool isVectorRegList(MCRegister Reg) {
- return RISCV::VRM2RegClass.contains(Reg) ||
- RISCV::VRM4RegClass.contains(Reg) ||
- RISCV::VRM8RegClass.contains(Reg) ||
- RISCV::VRN2M1RegClass.contains(Reg) ||
- RISCV::VRN2M2RegClass.contains(Reg) ||
- RISCV::VRN2M4RegClass.contains(Reg) ||
- RISCV::VRN3M1RegClass.contains(Reg) ||
- RISCV::VRN3M2RegClass.contains(Reg) ||
- RISCV::VRN4M1RegClass.contains(Reg) ||
- RISCV::VRN4M2RegClass.contains(Reg) ||
- RISCV::VRN5M1RegClass.contains(Reg) ||
- RISCV::VRN6M1RegClass.contains(Reg) ||
- RISCV::VRN7M1RegClass.contains(Reg) ||
- RISCV::VRN8M1RegClass.contains(Reg);
-}
+// NOTE: Alternatively, we can use BitVector here, but the number of RVV MC
+// opcodes is just a small portion of the entire opcode space, so I thought it
+// would be a waste of space to use BitVector.
+static SmallSet<unsigned, 16> RVVMCOpcodesWithPseudos;
class ExegesisRISCVTarget : public ExegesisTarget {
public:
@@ -109,6 +671,30 @@ class ExegesisRISCVTarget : public ExegesisTarget {
std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
const APInt &Value) const override;
+ const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
+ unsigned Opcode) const override {
+ // We don't want to support RVV instructions that depend on VTYPE, because
+ // those instructions by themselves don't carry any additional information
+ // for us to setup the proper VTYPE environment via VSETVL instructions.
+ // FIXME: Ideally, we should use RISCVVInversePseudosTable, but it requires
+ // LMUL and SEW and I don't think enumerating those combinations is any
+ // better than the ugly trick here that memorizes the corresponding MC
+ // opcodes of the RVV pseudo we have processed previously. This works most
+ // of the time because RVV pseudo opcodes are placed before any other RVV
+ // opcodes. Of course this doesn't work if we're asked to benchmark only a
+ // certain subset of opcodes.
+ if (RVVMCOpcodesWithPseudos.count(Opcode))
+ return "The MC opcode of RVV instructions are ignored";
+
+ // We want to support all RVV pseudos.
+ if (unsigned MCOpcode = RISCV::getRVVMCOpcode(Opcode)) {
+ RVVMCOpcodesWithPseudos.insert(MCOpcode);
+ return nullptr;
+ }
+
+ return ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode);
+ }
+
MCRegister getDefaultLoopCounterRegister(const Triple &) const override;
void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
@@ -131,9 +717,38 @@ class ExegesisRISCVTarget : public ExegesisTarget {
MCOperand &AssignedValue,
const BitVector &ForbiddenRegs) const override;
+ std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
+ const LLVMState &State,
+ const SnippetGenerator::Options &Opts) const override {
+ return std::make_unique<RISCVSnippetGenerator<SerialSnippetGenerator>>(
+ State, Opts);
+ }
+
+ std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
+ const LLVMState &State,
+ const SnippetGenerator::Options &Opts) const override {
+ return std::make_unique<RISCVSnippetGenerator<ParallelSnippetGenerator>>(
+ State, Opts);
+ }
+
std::vector<InstructionTemplate>
generateInstructionVariants(const Instruction &Instr,
unsigned MaxConfigsPerOpcode) const override;
+
+ void addTargetSpecificPasses(PassManagerBase &PM) const override {
+ // Turn AVL operand of physical registers into virtual registers.
+ PM.add(exegesis::createRISCVPreprocessingPass());
+ PM.add(createRISCVInsertVSETVLIPass());
+ // Setting up the correct FRM.
+ PM.add(createRISCVInsertReadWriteCSRPass());
+ PM.add(createRISCVInsertWriteVXRMPass());
+ // This will assign physical register to the result of VSETVLI instructions
+ // that produce VLMAX.
+ PM.add(exegesis::createRISCVPostprocessingPass());
+ // PseudoRET will be expanded by RISCVAsmPrinter; we have to expand
+ // PseudoMovImm with RISCVPostRAExpandPseudoPass though.
+ PM.add(createRISCVPostRAExpandPseudoPass());
+ }
};
ExegesisRISCVTarget::ExegesisRISCVTarget()
@@ -157,20 +772,7 @@ std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI,
return loadFPRegBits(STI, Reg, Value, RISCV::FMV_D_X);
return loadFP64RegBits32(STI, Reg, Value);
}
- if (Reg == RISCV::FRM || Reg == RISCV::VL || Reg == RISCV::VLENB ||
- Reg == RISCV::VTYPE || RISCV::GPRPairRegClass.contains(Reg) ||
- RISCV::VRRegClass.contains(Reg) || isVectorRegList(Reg)) {
- // Don't initialize:
- // - FRM
- // - VL, VLENB, VTYPE
- // - vector registers (and vector register lists)
- // - Zfinx registers
- // Generate 'NOP' so that exegesis treats such registers as initialized
- // (it tries to initialize them with '0' anyway).
- return {nop()};
- }
- errs() << "setRegTo is not implemented for Reg " << Reg
- << ", results will be unreliable\n";
+ // TODO: Emit proper code to initialize other kinds of registers.
return {};
}
@@ -243,6 +845,15 @@ Error ExegesisRISCVTarget::randomizeTargetMCOperand(
case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
AssignedValue = MCOperand::createImm(1);
break;
+ case RISCVOp::OPERAND_SIMM5:
+ // 5-bit signed immediate value.
+ AssignedValue = MCOperand::createImm(randomIndex(31) - 16);
+ break;
+ case RISCVOp::OPERAND_AVL:
+ case RISCVOp::OPERAND_UIMM5:
+ // 5-bit unsigned immediate value.
+ AssignedValue = MCOperand::createImm(randomIndex(31));
+ break;
default:
if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM)
diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
index 25cdf1ce66d44..f233ea4288a7e 100644
--- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
@@ -53,6 +53,8 @@ computeAliasingInstructions(const LLVMState &State, const Instruction *Instr,
if (OtherOpcode == Instr->Description.getOpcode())
continue;
const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
+ if (ET.getIgnoredOpcodeReasonOrNull(State, OtherInstr.getOpcode()))
+ continue;
if (OtherInstr.hasMemoryOperands())
continue;
if (!ET.allowAsBackToBack(OtherInstr))
diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp
index 5ea5b4c2c002f..68d19514bedb2 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Target.cpp
@@ -35,6 +35,19 @@ const ExegesisTarget *ExegesisTarget::lookup(Triple TT) {
return nullptr;
}
+const char *
+ExegesisTarget::getIgnoredOpcodeReasonOrNull(const LLVMState &State,
+ unsigned Opcode) const {
+ const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description;
+ if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())
+ return "Unsupported opcode: isPseudo/usesCustomInserter";
+ if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
+ return "Unsupported opcode: isBranch/isIndirectBranch";
+ if (InstrDesc.isCall() || InstrDesc.isReturn())
+ return "Unsupported opcode: isCall/isReturn";
+ return nullptr;
+}
+
Expected<std::unique_ptr<pfm::CounterGroup>>
ExegesisTarget::createCounter(StringRef CounterName, const LLVMState &,
ArrayRef<const char *> ValidationCounters,
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index f3fbe3780616f..77fbaa6e95412 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -154,6 +154,9 @@ class ExegesisTarget {
return IsOpcodeAvailable(Opcode, Features);
}
+ virtual const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
+ unsigned Opcode) const;
+
// Sets the stack register to the auxiliary memory so that operations
// requiring the stack can be formed (e.g., setting large registers). The code
// generated by this function may clobber registers.
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index b9938a92855a4..babcffeb9666a 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -300,18 +300,6 @@ T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {
return std::move(*E);
}
-static const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
- unsigned Opcode) {
- const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description;
- if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())
- return "Unsupported opcode: isPseudo/usesCustomInserter";
- if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
- return "Unsupported opcode: isBranch/isIndirectBranch";
- if (InstrDesc.isCall() || InstrDesc.isReturn())
- return "Unsupported opcode: isCall/isReturn";
- return nullptr;
-}
-
// Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
// and returns the opcode indices or {} if snippets should be read from
// `SnippetsFile`.
@@ -370,7 +358,8 @@ static Expected<std::vector<BenchmarkCode>>
generateSnippets(const LLVMState &State, unsigned Opcode,
const BitVector &ForbiddenRegs) {
// Ignore instructions that we cannot run.
- if (const char *Reason = getIgnoredOpcodeReasonOrNull(State, Opcode))
+ if (const char *Reason =
+ State.getExegesisTarget().getIgnoredOpcodeReasonOrNull(State, Opcode))
return make_error<Failure>(Reason);
const Instruction &Instr = State.getIC().getInstr(Opcode);
>From 0c4d94340c97988366d894721f6606fca689a5b3 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Tue, 25 Feb 2025 15:55:46 -0800
Subject: [PATCH 2/4] fixup! Change the test check prefix
---
.../tools/llvm-exegesis/RISCV/rvv/skip-rm.test | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
index a3af37149eeb5..b9a90b14bb26f 100644
--- a/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
@@ -1,12 +1,12 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
-# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VXRM
+# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VX
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
-# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRM
+# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FP
-# VXRM: PseudoVAADDU_VV_M1
-# VXRM: VXRM: rnu
-# VXRM-NOT: VXRM: {{(rne|rdn|rod)}}
+# VX: PseudoVAADDU_VV_M1
+# VX: VXRM: rnu
+# VX-NOT: VXRM: {{(rne|rdn|rod)}}
-# FRM: PseudoVFADD_VFPR16_M1_E16
-# FRM: FRM: rne
-# FRM-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}
+# FP: PseudoVFADD_VFPR16_M1_E16
+# FP: FRM: rne
+# FP-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}
>From bd1b6f856ad8101c59dda19f5f1b2ad04be63da8 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Wed, 26 Feb 2025 13:51:06 -0800
Subject: [PATCH 3/4] Address review comments
---
.../RISCV/rvv/eligible-inst.test | 57 +++++++++++++++++--
.../lib/RISCV/RISCVExegesisPostprocessing.cpp | 5 +-
llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp | 36 +++++-------
3 files changed, 69 insertions(+), 29 deletions(-)
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
index 189adf2c1b334..d5fdbc17192aa 100644
--- a/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
+++ b/llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
@@ -1,10 +1,59 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
-# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | \
+# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
-# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT
+# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 \
+# RUN: --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=RTHROUGHPUT1
+
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 | \
+# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVRGATHEREI16_VV_M2_E32_M1,PseudoVRGATHER_VI_M2,PseudoVRGATHER_VV_M8_E32,PseudoVRGATHER_VX_M4 \
+# RUN: --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=RTHROUGHPUT2
+
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 | \
+# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVSLIDE1UP_VX_M1,PseudoVSLIDEUP_VI_M2,PseudoVSLIDEUP_VX_M2 \
+# RUN: --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=RTHROUGHPUT3
+
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
+# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 | \
+# RUN: FileCheck %s --allow-empty --check-prefix=LATENCY
+# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
+# RUN: --opcode-name=PseudoVNCLIPU_WI_M2,PseudoVNSRA_WI_M2,PseudoVNSRL_WI_M2 \
+# RUN: --min-instructions=100 | \
+# RUN: FileCheck %s --check-prefix=RTHROUGHPUT4
+
+# These instructions are only eligible under the inverse throughput mode.
# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
# LATENCY-NOT: PseudoVCPOP_M_B32
+# LATENCY-NOT: PseudoVRGATHEREI16_VV_M2_E32_M1
+# LATENCY-NOT: PseudoVRGATHER_VI_M2
+# LATENCY-NOT: PseudoVRGATHER_VV_M8_E32
+# LATENCY-NOT: PseudoVRGATHER_VX_M4
+# LATENCY-NOT: PseudoVSLIDE1UP_VX_M1
+# LATENCY-NOT: PseudoVSLIDEUP_VI_M2
+# LATENCY-NOT: PseudoVSLIDEUP_VX_M2
+# LATENCY-NOT: PseudoVNCLIPU_WI_M2
+# LATENCY-NOT: PseudoVNSRA_WI_M2
+# LATENCY-NOT: PseudoVNSRL_WI_M2
-# RTHROUGHPUT: PseudoVCOMPRESS_VM_M2_E8
-# RTHROUGHPUT: PseudoVCPOP_M_B32
+# RTHROUGHPUT1: PseudoVCOMPRESS_VM_M2_E8
+# RTHROUGHPUT1: PseudoVCPOP_M_B32
+# RTHROUGHPUT2: PseudoVRGATHEREI16_VV_M2_E32_M1
+# RTHROUGHPUT2: PseudoVRGATHER_VI_M2
+# RTHROUGHPUT2: PseudoVRGATHER_VV_M8_E32
+# RTHROUGHPUT2: PseudoVRGATHER_VX_M4
+# RTHROUGHPUT3: PseudoVSLIDE1UP_VX_M1
+# RTHROUGHPUT3: PseudoVSLIDEUP_VI_M2
+# RTHROUGHPUT3: PseudoVSLIDEUP_VX_M2
+# RTHROUGHPUT4: PseudoVNCLIPU_WI_M2
+# RTHROUGHPUT4: PseudoVNSRA_WI_M2
+# RTHROUGHPUT4: PseudoVNSRL_WI_M2
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
index e25cf04a01d9e..fbb3d73a88c72 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
@@ -81,8 +81,9 @@ Register RISCVExegesisPostprocessing::allocateGPRRegister(
const MachineFunction &MF, const MachineRegisterInfo &MRI) {
const auto &TRI = *MRI.getTargetRegisterInfo();
- const TargetRegisterClass *GPRClass =
- TRI.getRegClass(RISCV::GPRJALRRegClassID);
+ // We hope to avoid allocating callee-saved registers. And GPRTC
+ // happens to account for nearly all caller-saved registers.
+ const TargetRegisterClass *GPRClass = TRI.getRegClass(RISCV::GPRTCRegClassID);
BitVector Candidates = TRI.getAllocatableSet(MF, GPRClass);
for (unsigned SetIdx : Candidates.set_bits()) {
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
index c66ca6b86fb86..84448cf9c3f85 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
@@ -149,17 +149,15 @@ template <class BaseT> class RISCVSnippetGenerator : public BaseT {
RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};
for (unsigned RegClassID : StandaloneRegClasses)
- for (unsigned Reg : RegInfo.getRegClass(RegClassID)) {
+ for (unsigned Reg : RegInfo.getRegClass(RegClassID))
AggregateRegisters.reset(Reg);
- }
// Initialize ELEN and VLEN.
- // FIXME: We could have obtained these two from RISCVSubtarget
+ // FIXME: We could have obtained these two constants from RISCVSubtarget
// but in order to get that from TargetMachine, we need a Function.
- const Triple &TT = State.getTargetMachine().getTargetTriple();
- ELEN = TT.isRISCV32() ? 32 : 64;
-
const MCSubtargetInfo &STI = State.getSubtargetInfo();
+ ELEN = STI.checkFeatures("+zve64x") ? 64 : 32;
+
std::string ZvlQuery;
for (unsigned I = 5U, Size = (1 << I); I < 17U; ++I, Size <<= 1) {
ZvlQuery = "+zvl";
@@ -175,15 +173,15 @@ template <class BaseT> class RISCVSnippetGenerator : public BaseT {
const BitVector &ForbiddenRegisters) const override;
};
-static bool isMaskedSibiling(unsigned MaskedOp, unsigned UnmaskedOp) {
+static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) {
const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
}
// There are primarily two kinds of opcodes that are not eligible
// in a serial snippet:
-// (1) Only has a single use operand that can not be overlap with
-// the def operand.
+// (1) Has a use operand that can not overlap with the def operand
+// (i.e. early clobber).
// (2) The register file of the only use operand is different from
// that of the def operand. For instance, use operand is vector and
// the result is a scalar.
@@ -197,6 +195,8 @@ static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
case RISCV::VCOMPRESS_VM:
case RISCV::VCPOP_M:
case RISCV::VCPOP_V:
+ // The permutation instructions listed below cannot have destination
+ // overlapping with the source.
case RISCV::VRGATHEREI16_VV:
case RISCV::VRGATHER_VI:
case RISCV::VRGATHER_VV:
@@ -204,16 +204,6 @@ static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
case RISCV::VSLIDE1UP_VX:
case RISCV::VSLIDEUP_VI:
case RISCV::VSLIDEUP_VX:
- // The truncate instructions that arraive here are those who cannot
- // have any overlap between source and dest at all (i.e.
- // those whoe don't satisfy condition 2 and 3 in RVV spec
- // 5.2).
- case RISCV::VNCLIPU_WI:
- case RISCV::VNCLIPU_WV:
- case RISCV::VNCLIPU_WX:
- case RISCV::VNCLIP_WI:
- case RISCV::VNCLIP_WV:
- case RISCV::VNCLIP_WX:
return true;
default:
return false;
@@ -372,8 +362,8 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
const auto *RVVBase =
RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
- isMaskedSibiling(VPseudoOpcode, RVVBase->Pseudo) ||
- isMaskedSibiling(RVVBase->Pseudo, VPseudoOpcode))) {
+ isMaskedSibling(VPseudoOpcode, RVVBase->Pseudo) ||
+ isMaskedSibling(RVVBase->Pseudo, VPseudoOpcode))) {
// There is an integrated SEW, remove all but the SEW pushed last.
SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
break;
@@ -395,7 +385,7 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
}
}
- // The EEW for source operand in VSEXT and VZEXT is a fractional
+ // The EEW for source operand in VSEXT and VZEXT is a fraction
// of the SEW, hence only SEWs that will lead to valid EEW are allowed.
if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
if (*SEW / *Frac < MinSEW) {
@@ -411,7 +401,7 @@ void RISCVSnippetGenerator<BaseT>::annotateWithVType(
Feature_HasStdExtZvksedBit,
Feature_HasStdExtZvkshBit})) {
if (*SEW != 32)
- // Zvknhb support SEW=64 as well.
+ // Zvknhb supports SEW=64 as well.
if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
!isOpcodeAvailableIn(BaseOpcode,
{Feature_HasStdExtZvknhaOrZvknhbBit})) {
>From 32cb8fe11a043de10d9c7fc0495a5e040538f1a2 Mon Sep 17 00:00:00 2001
From: Min Hsu <min.hsu at sifive.com>
Date: Wed, 26 Feb 2025 14:03:48 -0800
Subject: [PATCH 4/4] fixup! Address review comments
---
.../llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
index fbb3d73a88c72..c818f51806076 100644
--- a/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/RISCVExegesisPostprocessing.cpp
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
// \file
-// Currently there is only one post-processing we need to do for exegesis:
-// Assign a physical register to VSETVL's rd if it's not X0 (i.e. VLMAX).
+// This Pass converts some of the virtual register operands in VSETVLI and FRM
+// pseudos into physical registers.
//
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list