[llvm] [RISCV][VLOPT] Add strided and unit strided loads to isSupported (PR #121705)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 5 08:51:18 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Michael Maitland (michaelmaitland)
<details>
<summary>Changes</summary>
Add to getOperandInfo too since that is needed to reduce the VL.
---
Patch is 140.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121705.diff
29 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp (+30)
- (modified) llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll (+3-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll (+95-119)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll (-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll (+1)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll (+36-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vand-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.mir (+30)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll (+20-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll (+20-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/vor-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll (+8-16)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 32d552625a8e8b..7ea0f797f85d73 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -257,17 +257,31 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
// Vector Unit-Stride Instructions
// Vector Strided Instructions
/// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL
+ case RISCV::VLE8_V:
case RISCV::VSE8_V:
+ case RISCV::VLM_V:
+ case RISCV::VSM_V:
+ case RISCV::VLSE8_V:
case RISCV::VSSE8_V:
+ case RISCV::VLE8FF_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+ case RISCV::VLE16_V:
case RISCV::VSE16_V:
+ case RISCV::VLSE16_V:
case RISCV::VSSE16_V:
+ case RISCV::VLE16FF_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+ case RISCV::VLE32_V:
case RISCV::VSE32_V:
+ case RISCV::VLSE32_V:
case RISCV::VSSE32_V:
+ case RISCV::VLE32FF_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+ case RISCV::VLE64_V:
case RISCV::VSE64_V:
+ case RISCV::VLSE64_V:
case RISCV::VSSE64_V:
+ case RISCV::VLE64FF_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
// Vector Indexed Instructions
@@ -732,6 +746,22 @@ static bool isSupportedInstr(const MachineInstr &MI) {
return false;
switch (RVV->BaseInstr) {
+ // Vector Unit-Stride Instructions
+ // Vector Strided Instructions
+ case RISCV::VLE8_V:
+ case RISCV::VLM_V:
+ case RISCV::VLSE8_V:
+ case RISCV::VLE8FF_V:
+ case RISCV::VLE16_V:
+ case RISCV::VLSE16_V:
+ case RISCV::VLE16FF_V:
+ case RISCV::VLE32_V:
+ case RISCV::VLSE32_V:
+ case RISCV::VLE32FF_V:
+ case RISCV::VLE64_V:
+ case RISCV::VLSE64_V:
+ case RISCV::VLE64FF_V:
+
// Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VI:
case RISCV::VADD_VV:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index 10d24927d9b783..4d34621cd5f243 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -1445,10 +1445,9 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
-; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a6), zero
; RV32-NEXT: lui a4, 61681
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v11, v8, a5, v0.t
@@ -1595,9 +1594,7 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va
; RV32-NEXT: vand.vx v13, v8, a1
; RV32-NEXT: vand.vx v12, v12, a1
; RV32-NEXT: vor.vv v11, v12, v11
-; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v13, v13, a4
; RV32-NEXT: vor.vv v10, v10, v13
; RV32-NEXT: vsrl.vi v13, v8, 8
@@ -1730,10 +1727,9 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
-; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a6), zero
; RV32-NEXT: lui a4, 61681
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v14, v8, a5, v0.t
@@ -1880,9 +1876,7 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va
; RV32-NEXT: vand.vx v18, v8, a1
; RV32-NEXT: vand.vx v16, v16, a1
; RV32-NEXT: vor.vv v10, v16, v10
-; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v16, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v18, v18, a4
; RV32-NEXT: vor.vv v12, v12, v18
; RV32-NEXT: vsrl.vi v18, v8, 8
@@ -2015,10 +2009,9 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
-; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a6), zero
; RV32-NEXT: lui a4, 61681
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v20, v8, a5, v0.t
@@ -2165,9 +2158,7 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va
; RV32-NEXT: vand.vx v28, v8, a1
; RV32-NEXT: vand.vx v24, v24, a1
; RV32-NEXT: vor.vv v12, v24, v12
-; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v28, v28, a4
; RV32-NEXT: vor.vv v16, v16, v28
; RV32-NEXT: vsrl.vi v28, v8, 8
@@ -2315,7 +2306,6 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
@@ -2323,7 +2313,6 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 4080
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
@@ -2528,9 +2517,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 24
; RV32-NEXT: vand.vx v16, v16, a5
; RV32-NEXT: vsrl.vi v0, v8, 8
@@ -2704,7 +2691,6 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
@@ -2712,7 +2698,6 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 4080
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
@@ -2917,9 +2902,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 24
; RV32-NEXT: vand.vx v16, v16, a5
; RV32-NEXT: vsrl.vi v0, v8, 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 0dc1d0c32ac449..0c58cca0f94726 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -523,11 +523,9 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v10, v8, a1, v0.t
-; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, ma
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v10, v8, a0, v0.t
; RV32-NEXT: vlse64.v v11, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v10, v10, a4, v0.t
; RV32-NEXT: vor.vv v9, v9, v10, v0.t
; RV32-NEXT: vand.vx v10, v8, a5, v0.t
@@ -538,7 +536,7 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; RV32-NEXT: vor.vv v9, v9, v10, v0.t
; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t
-; RV32-NEXT: vand.vx v12, v12, a1, v0.t
+; RV32-NEXT: vand.vx v12, v12, a0, v0.t
; RV32-NEXT: vor.vv v10, v12, v10, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
; RV32-NEXT: vand.vx v12, v12, a5, v0.t
@@ -609,15 +607,13 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsll.vx v10, v8, a2
-; RV32-NEXT: addi a1, a3, -256
+; RV32-NEXT: addi a0, a3, -256
; RV32-NEXT: vsrl.vx v11, v8, a2
; RV32-NEXT: vsrl.vx v12, v8, a4
-; RV32-NEXT: vand.vx v13, v8, a1
-; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vand.vx v13, v8, a0
+; RV32-NEXT: vand.vx v12, v12, a0
; RV32-NEXT: vor.vv v11, v12, v11
-; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v13, v13, a4
; RV32-NEXT: vor.vv v10, v10, v13
; RV32-NEXT: vsrl.vi v13, v8, 8
@@ -695,11 +691,9 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v12, v8, a1, v0.t
-; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, ma
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v12, v8, a0, v0.t
; RV32-NEXT: vlse64.v v14, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v12, v12, a4, v0.t
; RV32-NEXT: vor.vv v10, v10, v12, v0.t
; RV32-NEXT: vand.vx v12, v8, a5, v0.t
@@ -710,7 +704,7 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; RV32-NEXT: vor.vv v10, v10, v12, v0.t
; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t
-; RV32-NEXT: vand.vx v16, v16, a1, v0.t
+; RV32-NEXT: vand.vx v16, v16, a0, v0.t
; RV32-NEXT: vor.vv v12, v16, v12, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
; RV32-NEXT: vand.vx v16, v16, a5, v0.t
@@ -781,15 +775,13 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsll.vx v12, v8, a2
-; RV32-NEXT: addi a1, a3, -256
+; RV32-NEXT: addi a0, a3, -256
; RV32-NEXT: vsrl.vx v14, v8, a2
; RV32-NEXT: vsrl.vx v16, v8, a4
-; RV32-NEXT: vand.vx v18, v8, a1
-; RV32-NEXT: vand.vx v16, v16, a1
+; RV32-NEXT: vand.vx v18, v8, a0
+; RV32-NEXT: vand.vx v16, v16, a0
; RV32-NEXT: vor.vv v14, v16, v14
-; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v16, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v18, v18, a4
; RV32-NEXT: vor.vv v12, v12, v18
; RV32-NEXT: vsrl.vi v18, v8, 8
@@ -867,11 +859,9 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v20, v8, a1, v0.t
-; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, ma
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v20, v8, a0, v0.t
; RV32-NEXT: vlse64.v v12, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v20, v20, a4, v0.t
; RV32-NEXT: vor.vv v16, v16, v20, v0.t
; RV32-NEXT: vand.vx v20, v8, a5, v0.t
@@ -882,7 +872,7 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; RV32-NEXT: vor.vv v16, v16, v20, v0.t
; RV32-NEXT: vsrl.vx v20, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
-; RV32-NEXT: vand.vx v24, v24, a1, v0.t
+; RV32-NEXT: vand.vx v24, v24, a0, v0.t
; RV32-NEXT: vor.vv v20, v24, v20, v0.t
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
; RV32-NEXT: vand.vx v24, v24, a5, v0.t
@@ -953,15 +943,13 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsll.vx v16, v8, a2
-; RV32-NEXT: addi a1, a3, -256
+; RV32-NEXT: addi a0, a3, -256
; RV32-NEXT: vsrl.vx v20, v8, a2
; RV32-NEXT: vsrl.vx v24, v8, a4
-; RV32-NEXT: vand.vx v28, v8, a1
-; RV32-NEXT: vand.vx v24, v24, a1
+; RV32-NEXT: vand.vx v28, v8, a0
+; RV32-NEXT: vand.vx v24, v24, a0
; RV32-NEXT: vor.vv v20, v24, v20
-; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v28, v28, a4
; RV32-NEXT: vor.vv v16, v16, v28
; RV32-NEXT: vsrl.vi v28, v8, 8
@@ -1043,51 +1031,49 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v24, v8, a1, v0.t
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v24, v8, a0, v0.t
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vlse64.v v16, (a5), zero
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a1, 4080
+; RV32-NEXT: vand.vx v24, v8, a1, v0.t
+; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
+; RV32-NEXT: addi a3, sp, 16
+; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
+; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a5), zero
+; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
+; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: lui a3, 4080
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vx v24, v8, a3, v0.t
-; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
-; RV32-NEXT: vand.vx v24, v24, a1, v0.t
+; RV32-NEXT: vand.vx v24, v24, a0, v0.t
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
-; RV32-NEXT: vand.vx v24, v24, a3, v0.t
+; RV32-NEXT: vand.vx v24, v24, a1, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
@@ -1193,24 +1179,22 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v24, v8, a2
-; RV32-NEXT: addi a1, a3, -256
+; RV32-NEXT: addi a0, a3, -256
; RV32-NEXT: vsrl.vx v16, v8, a2
; RV32-NEXT: vsrl.vx v0, v8, a4
-; RV32-NEXT: vand.vx v0, v0, a1
+; RV32-NEXT: vand.vx v0, v0, a0
; RV32-NEXT: vor.vv v16, v0, v16
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 3
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vx v0, v8, a1
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vx v0, v8, a0
; RV32-NEXT: vsll.vx v0, v0, a4
; RV32-NEXT: vor.vv v16, v24, v0
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v0, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 24
; RV32-NEXT: vand.vx v16, v16, a5
; RV32-NEXT: vsrl.vi v24, v8, 8
@@ -1221,7 +1205,6 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
; RV32-NEXT: vsll.vi v8, v8, 24
; RV32-NEXT: vsll.vi v24, v24, 8
; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v8, v24, v8
; RV32-NEXT: csrr a0, vlenb
@@ -1318,51 +1301,49 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v24, v8, a1, v0.t
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v24, v8, a0, v0.t
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vlse64.v v16, (a5), zero
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a1, 4080
+; RV32-NEXT: vand.vx v24, v8, a1, v0.t
+; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
+; RV32-NEXT: addi a3, sp, 16
+; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
+; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
-; RV32-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121705
More information about the llvm-commits
mailing list