[llvm] [RISCV] Disable combineToVCPOP for illegal scalable vector types. (PR #140195)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 15 23:11:28 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
This transform creates target specific instructions which must have legal types. We were checking this for fixed vectors, but not scalable vectors. This caused a crash with <vscale x 1 x i1> which isn't legal for Zve32x.
---
Patch is 32.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140195.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+4-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll (+722)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c53550ea3b23b..045b346abf341 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18798,6 +18798,10 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,
if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
return SDValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(SrcMVT))
+ return SDValue();
+
// Check that destination type is large enough to hold result without
// overflow.
if (Opc == ISD::VECREDUCE_ADD) {
@@ -18814,9 +18818,6 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,
MVT ContainerVT = SrcMVT;
if (SrcMVT.isFixedLengthVector()) {
- if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
- return SDValue();
-
ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index ac1d63311fd1e..582871e05801d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zve32x,+zvl64b,+zbb | FileCheck %s --check-prefixes=ZVE
define i32 @test_v2i1(<2 x i1> %x) {
; CHECK-LABEL: test_v2i1:
@@ -8,6 +9,12 @@ define i32 @test_v2i1(<2 x i1> %x) {
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v2i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <2 x i1> %x to <2 x i32>
%b = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
ret i32 %b
@@ -19,6 +26,12 @@ define i32 @test_v4i1(<4 x i1> %x) {
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v4i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <4 x i1> %x to <4 x i32>
%b = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
ret i32 %b
@@ -30,6 +43,12 @@ define i32 @test_v8i1(<8 x i1> %x) {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v8i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <8 x i1> %x to <8 x i32>
%b = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
ret i32 %b
@@ -41,6 +60,12 @@ define i32 @test_v16i1(<16 x i1> %x) {
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v16i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: vsetivli zero, 16, e8, m2, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <16 x i1> %x to <16 x i32>
%b = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
ret i32 %b
@@ -53,6 +78,13 @@ define i32 @test_v32i1(<32 x i1> %x) {
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v32i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: li a0, 32
+; ZVE-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <32 x i1> %x to <32 x i32>
%b = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %a)
ret i32 %b
@@ -65,6 +97,13 @@ define i32 @test_v64i1(<64 x i1> %x) {
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v64i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: li a0, 64
+; ZVE-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <64 x i1> %x to <64 x i32>
%b = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %a)
ret i32 %b
@@ -77,6 +116,93 @@ define i32 @test_v128i1(<128 x i1> %x) {
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v128i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: addi sp, sp, -16
+; ZVE-NEXT: .cfi_def_cfa_offset 16
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: sub sp, sp, a0
+; ZVE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vmv1r.v v7, v8
+; ZVE-NEXT: vmv1r.v v6, v0
+; ZVE-NEXT: vslidedown.vi v5, v8, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmv.v.i v16, 0
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v5, 2
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vslidedown.vi v4, v6, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v4, 2
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v7, 2
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v6, 2
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vadd.vv v16, v16, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv.v.i v16, 0
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v5
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v4
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: vmv1r.v v0, v7
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v6
+; ZVE-NEXT: vmerge.vim v16, v16, 1, v0
+; ZVE-NEXT: vadd.vv v16, v16, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v24, v0, v24
+; ZVE-NEXT: vadd.vv v8, v16, v8
+; ZVE-NEXT: vadd.vv v8, v8, v24
+; ZVE-NEXT: vmv.s.x v16, zero
+; ZVE-NEXT: vredsum.vs v8, v8, v16
+; ZVE-NEXT: vmv.x.s a0, v8
+; ZVE-NEXT: csrr a1, vlenb
+; ZVE-NEXT: slli a1, a1, 4
+; ZVE-NEXT: add sp, sp, a1
+; ZVE-NEXT: .cfi_def_cfa sp, 16
+; ZVE-NEXT: addi sp, sp, 16
+; ZVE-NEXT: .cfi_def_cfa_offset 0
+; ZVE-NEXT: ret
%a = zext <128 x i1> %x to <128 x i32>
%b = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> %a)
ret i32 %b
@@ -168,17 +294,331 @@ define i32 @test_v256i1(<256 x i1> %x) {
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v256i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: addi sp, sp, -16
+; ZVE-NEXT: .cfi_def_cfa_offset 16
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a1, a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: sub sp, sp, a0
+; ZVE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vmv1r.v v6, v10
+; ZVE-NEXT: vmv1r.v v4, v9
+; ZVE-NEXT: vmv1r.v v7, v8
+; ZVE-NEXT: vmv1r.v v5, v0
+; ZVE-NEXT: vslidedown.vi v3, v9, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmv.v.i v16, 0
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v3, 2
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vslidedown.vi v2, v5, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 2
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v2, 2
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vslidedown.vi v1, v6, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 5
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v1, 2
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vslidedown.vi v24, v7, 4
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v24, 2
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v4, 2
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v5, 2
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 2
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 5
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 2
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v6, 2
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vadd.vv v8, v8, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 5
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v7, 2
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v8, v8, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v3
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v2
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v1
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v4
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v5
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v6
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v7
+; ZVE-NEXT: vmerge.vim v16, v16, 1, v0
+; ZVE-NEXT: vadd.vv v16, v16, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 2
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 5
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v24, v24, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v0, v0, v8
+; ZVE-NEXT: vadd.vv v24, v0, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v0, v8, v0
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v8, v8, v16
+; ZVE-NEXT: vadd.vv v8, v8, v0
+; ZVE-NEXT: vadd.vv v8, v8, v24
+; ZVE-NEXT: vmv.s.x v16, zero
+; ZVE-NEXT: vredsum.vs v8, v8, v16
+; ZVE-NEXT: vmv.x.s a0, v8
+; ZVE-NEXT: csrr a1, vlenb
+; ZVE-NEXT: slli a1, a1, 3
+; ZVE-NEXT: mv a2, a1
+; ZVE-NEXT: slli a1, a1, 1
+; ZVE-NEXT: add a2, a2, a1
+; ZVE-NEXT: slli a1, a1, 1
+; ZVE-NEXT: add a1, a1, a2
+; ZVE-NEXT: add sp, sp, a1
+; ZVE-NEXT: .cfi_def_cfa sp, 16
+; ZVE-NEXT: addi sp, sp, 16
+; ZVE-NEXT: .cfi_def_cfa_offset 0
+; ZVE-NEXT: ret
%a = zext <256 x i1> %x to <256 x i32>
%b = call i32 @llvm.vector.reduce.add.v256i32(<256 x i32> %a)
ret i32 %b
}
+; FIXME: Optimize this case with Zve32x. We have to use mf4 and set the VL to
+; VLEN/64.
+define i32 @test_nxv1i1(<vscale x 1 x i1> %x) {
+; CHECK-LABEL: test_nxv1i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_nxv1i1:
+; ZVE: # %bb.0: # %entry
+; ZVE-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVE-NEXT: vmv.v.i v8, 0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: srli a0, a0, 3
+; ZVE-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v8, 1, v0
+; ZVE-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; ZVE-NEXT: vmv.s.x v9, zero
+; ZVE-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVE-NEXT: vredsum.vs v9, v8, v9
+; ZVE-NEXT: vmv.x.s a0, v9
+; ZVE-NEXT: ret
+entry:
+ %a = zext <vscale x 1 x i1> %x to <vscale x 1 x i32>
+ %b = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %a)
+ ret i32 %b
+}
+
define i32 @test_nxv2i1(<vscale x 2 x i1> %x) {
; CHECK-LABEL: test_nxv2i1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_nxv2i1:
+; ZVE: # %bb.0: # %entry
+; ZVE-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
entry:
%a = zext <vscale x 2 x i1> %x to <vscale x 2 x i32>
%b = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140195
More information about the llvm-commits
mailing list