[llvm] [RISCV] Use vsetivli instead of `x0,x0` form to retain SEW/LMUL when AVL is imm (PR #169307)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 25 22:22:33 PST 2025
================
@@ -47,6 +47,11 @@ static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE(
"vill is cleared"),
cl::init(true));
+static cl::opt<bool> UseVsetivliForImmAVL(
+ DEBUG_TYPE "-use-vsetivli-for-imm-avl", cl::Hidden,
+ cl::desc("Use vsetivli to replace x0,x0 form when AVL is an immediate."),
+ cl::init(true));
----------------
wangpc-pp wrote:
I just wrote a test and there is a stable 0.1%-0.3% performance gain:
```c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
// clang-format off
#define LOOP2 LOOP;LOOP
#define LOOP4 LOOP2;LOOP2
#define LOOP8 LOOP4;LOOP4
#define LOOP16 LOOP8;LOOP8
#define LOOP32 LOOP16;LOOP16
#define LOOP64 LOOP32;LOOP32
#define LOOP128 LOOP64;LOOP64
#define LOOP256 LOOP128;LOOP128
#define LOOP512 LOOP256;LOOP256
#define LOOP1024 LOOP512;LOOP512
// clang-format on
__attribute__((naked)) void vsetvlix0x0() {
#define LOOP \
asm("vsetivli zero, 16, e32, m4, ta, ma"); /*vl=16*/ \
asm("vadd.vv v0, v4, v0"); \
asm("vsetivli zero, 4, e32, m1, ta, ma"); /*vl=4*/ \
asm("vsetvli zero, zero, e64, m2, ta, ma"); /*vl=4*/ \
asm("vadd.vv v8, v10, v8"); \
asm("vsetvli zero, zero, e32, m1, ta, ma"); /*vl=4*/ \
asm("vadd.vv v12, v13, v12")
LOOP1024;
#undef LOOP
asm("ret");
}
__attribute__((naked)) void vsetivli() {
#define LOOP \
asm("vsetivli zero, 16, e32, m4, ta, ma"); /*vl=16*/ \
asm("vadd.vv v0, v4, v0"); \
asm("vsetivli zero, 4, e32, m1, ta, ma"); /*vl=4*/ \
asm("vsetivli zero, 4, e64, m2, ta, ma"); /*vl=4*/ \
asm("vadd.vv v8, v10, v8"); \
asm("vsetivli zero, 4, e32, m1, ta, ma"); /*vl=4*/ \
asm("vadd.vv v12, v13, v12")
LOOP1024;
#undef LOOP
asm("ret");
}
void bench(const char *name, int times, void (*func)()) {
clock_t start = clock();
for (int i = 0; i < times; i++) {
func();
}
clock_t end = clock();
printf("Time of %s: %ld\n", name, end - start);
}
int main(int argc, char **argv) {
int times = argc == 1 ? 1000000 : atoi(argv[1]);
bench("vsetvlix0x0", times, vsetvlix0x0);
bench("vsetivli", times, vsetivli);
return 0;
}
```
Please double check that and I don't know if this test is representative.
https://github.com/llvm/llvm-project/pull/169307
More information about the llvm-commits
mailing list