[llvm] [LegalizeTypes][RISCV] Use SPLAT_VECTOR_PARTS to legalize splat BUILD_VECTOR (PR #107290)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 4 12:11:58 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
If the element type needs to be expanded, we can use SPLAT_VECTOR_PARTS if the target supports it.
There's already a DAGCombine to turn BUILD_VECTOR into SPLAT_VECTOR if the target makes SPLAT_VECTOR legal, but it doesn't fire for vectors that need to be split.
Alternatively, we could tweak the existig DAGCombine to form SPLAT_VECTOR if the type will be split to a legal vector type that supports SPLAT_VECTOR. That requires iterating through getTypeToTransformTo until we reach a legal type.
---
Patch is 69.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107290.diff
15 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp (+9)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll (+10-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll (+19-37)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll (+41-81)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll (+52-120)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll (+17-19)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll (+21-45)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll (+21-45)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll (+21-45)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll (+21-45)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll (+57-130)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll (+57-130)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll (+61-134)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll (+61-134)
- (modified) llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll (+32-29)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index b402e823762764..2655e8428309da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -376,6 +376,15 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
assert(OldVT == VecVT.getVectorElementType() &&
"BUILD_VECTOR operand type doesn't match vector element type!");
+ if (VecVT.isInteger() && TLI.isOperationLegal(ISD::SPLAT_VECTOR, VecVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR_PARTS, VecVT)) {
+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ SDValue Lo, Hi;
+ GetExpandedOp(V, Lo, Hi);
+ return DAG.getNode(ISD::SPLAT_VECTOR_PARTS, dl, VecVT, Lo, Hi);
+ }
+ }
+
// Build a vector of twice the length out of the expanded elements.
// For example <3 x i64> -> <6 x i32>.
SmallVector<SDValue, 16> NewElts;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index 901be442c0012a..d52cbb54c4b2da 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -14,9 +14,11 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: mv a0, sp
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
@@ -669,9 +671,11 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: sw a1, 4(sp)
+; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: mv a0, sp
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
index ad075e4b4e198c..2f20caa6eb1894 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
@@ -397,43 +397,22 @@ define void @masked_load_v32i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
declare <32 x i32> @llvm.masked.load.v32i32(ptr, i32, <32 x i1>, <32 x i32>)
define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; RV32-LABEL: masked_load_v32i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a3, a1, 128
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vle64.v v0, (a1)
-; RV32-NEXT: vle64.v v24, (a3)
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.i v16, 0
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vmseq.vv v8, v0, v16
-; RV32-NEXT: vmseq.vv v0, v24, v16
-; RV32-NEXT: addi a1, a0, 128
-; RV32-NEXT: vle64.v v16, (a1), v0.t
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vle64.v v8, (a0), v0.t
-; RV32-NEXT: vse64.v v8, (a2)
-; RV32-NEXT: addi a0, a2, 128
-; RV32-NEXT: vse64.v v16, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: masked_load_v32i64:
-; RV64: # %bb.0:
-; RV64-NEXT: addi a3, a1, 128
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vle64.v v16, (a1)
-; RV64-NEXT: vle64.v v24, (a3)
-; RV64-NEXT: vmseq.vi v8, v16, 0
-; RV64-NEXT: vmseq.vi v0, v24, 0
-; RV64-NEXT: addi a1, a0, 128
-; RV64-NEXT: vle64.v v16, (a1), v0.t
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vle64.v v8, (a0), v0.t
-; RV64-NEXT: vse64.v v8, (a2)
-; RV64-NEXT: addi a0, a2, 128
-; RV64-NEXT: vse64.v v16, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: masked_load_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a3, a1, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: vle64.v v24, (a3)
+; CHECK-NEXT: vmseq.vi v8, v16, 0
+; CHECK-NEXT: vmseq.vi v0, v24, 0
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vle64.v v16, (a1), v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: vse64.v v8, (a2)
+; CHECK-NEXT: addi a0, a2, 128
+; CHECK-NEXT: vse64.v v16, (a0)
+; CHECK-NEXT: ret
%m = load <32 x i64>, ptr %m_ptr
%mask = icmp eq <32 x i64> %m, zeroinitializer
%load = call <32 x i64> @llvm.masked.load.v32i64(ptr %a, i32 8, <32 x i1> %mask, <32 x i64> undef)
@@ -547,3 +526,6 @@ define void @masked_load_v256i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
ret void
}
declare <256 x i8> @llvm.masked.load.v256i8(ptr, i32, <256 x i1>, <256 x i8>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
index 86c28247e97ef1..90690bbc8e2085 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
@@ -397,87 +397,44 @@ define void @masked_store_v32i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
declare void @llvm.masked.store.v32i32.p0(<32 x i32>, ptr, i32, <32 x i1>)
define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; RV32-LABEL: masked_store_v32i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 4
-; RV32-NEXT: sub sp, sp, a3
-; RV32-NEXT: addi a3, a2, 128
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vle64.v v24, (a2)
-; RV32-NEXT: vle64.v v8, (a3)
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 3
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vmseq.vv v7, v24, v8
-; RV32-NEXT: addi a2, a0, 128
-; RV32-NEXT: vle64.v v24, (a2)
-; RV32-NEXT: vle64.v v16, (a0)
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vmseq.vv v0, v16, v8
-; RV32-NEXT: addi a0, a1, 128
-; RV32-NEXT: vse64.v v24, (a0), v0.t
-; RV32-NEXT: vmv1r.v v0, v7
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vse64.v v8, (a1), v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add sp, sp, a0
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-;
-; RV64-LABEL: masked_store_v32i64:
-; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -16
-; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 4
-; RV64-NEXT: sub sp, sp, a3
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vle64.v v8, (a2)
-; RV64-NEXT: addi a2, a2, 128
-; RV64-NEXT: vle64.v v16, (a2)
-; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 3
-; RV64-NEXT: add a2, sp, a2
-; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: vmseq.vi v0, v8, 0
-; RV64-NEXT: vle64.v v24, (a0)
-; RV64-NEXT: addi a0, a0, 128
-; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vmseq.vi v8, v16, 0
-; RV64-NEXT: vse64.v v24, (a1), v0.t
-; RV64-NEXT: addi a0, a1, 128
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vse64.v v8, (a0), v0.t
-; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
-; RV64-NEXT: add sp, sp, a0
-; RV64-NEXT: addi sp, sp, 16
-; RV64-NEXT: ret
+; CHECK-LABEL: masked_store_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a3, a3, 4
+; CHECK-NEXT: sub sp, sp, a3
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v8, (a2)
+; CHECK-NEXT: addi a2, a2, 128
+; CHECK-NEXT: vle64.v v16, (a2)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vmseq.vi v0, v8, 0
+; CHECK-NEXT: vle64.v v24, (a0)
+; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmseq.vi v8, v16, 0
+; CHECK-NEXT: vse64.v v24, (a1), v0.t
+; CHECK-NEXT: addi a0, a1, 128
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vse64.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%m = load <32 x i64>, ptr %m_ptr
%mask = icmp eq <32 x i64> %m, zeroinitializer
%val = load <32 x i64>, ptr %val_ptr
@@ -683,3 +640,6 @@ define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
ret void
}
declare void @llvm.masked.store.v256i8.p0(<256 x i8>, ptr, i32, <256 x i1>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
index 5601bd5ee7a3ae..805a3c640957bf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
@@ -1346,93 +1346,48 @@ define <16 x i64> @vadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vadd_vx_v32i64:
-; RV32: # %bb.0:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v7, v0, 2
-; RV32-NEXT: mv a1, a0
-; RV32-NEXT: bltu a0, a2, .LBB108_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a1, 16
-; RV32-NEXT: .LBB108_2:
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.i v24, -1
-; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
-; RV32-NEXT: addi a1, a0, -16
-; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a0, a0, a1
-; RV32-NEXT: vmv1r.v v0, v7
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vadd_vx_v32i64:
-; RV64: # %bb.0:
-; RV64-NEXT: li a2, 16
-; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v24, v0, 2
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bltu a0, a2, .LBB108_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB108_2:
-; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
-; RV64-NEXT: addi a1, a0, -16
-; RV64-NEXT: sltu a0, a0, a1
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a0, a0, a1
-; RV64-NEXT: vmv1r.v v0, v24
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vadd_vx_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB108_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: .LBB108_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl)
ret <32 x i64> %v
}
define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
-; RV32-LABEL: vadd_vi_v32i64_unmasked:
-; RV32: # %bb.0:
-; RV32-NEXT: li a2, 16
-; RV32-NEXT: mv a1, a0
-; RV32-NEXT: bltu a0, a2, .LBB109_2
-; RV32-NEXT: # %bb.1:
-; RV32-NEXT: li a1, 16
-; RV32-NEXT: .LBB109_2:
-; RV32-NEXT: li a2, 32
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.i v24, -1
-; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v8, v8, v24
-; RV32-NEXT: addi a1, a0, -16
-; RV32-NEXT: sltu a0, a0, a1
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a0, a0, a1
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v16, v16, v24
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vadd_vi_v32i64_unmasked:
-; RV64: # %bb.0:
-; RV64-NEXT: li a2, 16
-; RV64-NEXT: mv a1, a0
-; RV64-NEXT: bltu a0, a2, .LBB109_2
-; RV64-NEXT: # %bb.1:
-; RV64-NEXT: li a1, 16
-; RV64-NEXT: .LBB109_2:
-; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT: vadd.vi v8, v8, -1
-; RV64-NEXT: addi a1, a0, -16
-; RV64-NEXT: sltu a0, a0, a1
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a0, a0, a1
-; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vadd.vi v16, v16, -1
-; RV64-NEXT: ret
+; CHECK-LABEL: vadd_vi_v32i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB109_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: .LBB109_2:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: addi a1, a0, -16
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1
+; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl)
ret <32 x i64> %v
}
@@ -1440,49 +1395,26 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; FIXME: We don't match vadd.vi on RV32.
define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
-; RV32-LABEL: vadd_vx_v32i64_evl12:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, 32
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.i v16, -1
-; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vadd_vx_v32i64_evl12:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma
-; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vadd_vx_v32i64_evl12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12)
ret <32 x i64> %v
}
define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
-; RV32-LABEL: vadd_vx_v32i64_evl27:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v7, v0, 2
-; RV32-NEXT: li a0, 32
-; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.i v24, -1
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
-; RV32-NEXT: vmv1r.v v0, v7
-; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma
-; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vadd_vx_v32i64_evl27:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v24, v0, 2
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
-; RV64-NEXT: vmv1r.v v0, v24
-; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma
-; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
-; RV64-NEXT: ret
+; CHECK-LABEL: vadd_vx_v32i64_evl27:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
%v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27)
ret <32 x i64> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
index d414be76672ab0..c413dd86f37128 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
@@ -1139,18 +1139,16 @@ define <11 x i64> @vand_vv_v11i64_unmasked(<11 x i64> %va, <11 x i64> %b, i32 ze
define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vand_vx_v11i64:
; RV32: # %bb.0:
-; RV32-NEXT: vmv1r.v v16, v0
-; RV32-NEXT: lui a3, 341
-; RV32-NEXT: addi a3, a3, 1365
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v0, a3
-; RV32-NEXT: li a3, 32
-; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.x v24, a1
-; RV32-NEXT: vmerge.vxm v24, v24, a0, v0
-; RV32-NEXT: vmv1r.v v0, v16
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vand.vv v8, v8, v24, v0.t
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vand_vx_v11i64:
@@ -1167,16 +1165,16 @@ define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zero
define <11 x i64> @vand_vx_v11i64_unmasked(<11 x i64> %va, i64 %b, i32 zeroext %evl) {
; RV32-LABEL: vand_vx_v11i64_unmasked:
; RV32: # %bb.0:
-; RV32-NEXT: li a3, 32
-; RV32-NEXT: lui a4, 341
-; RV32-NEXT: addi a4, a4, 1365
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vmv.s.x v0, a4
-; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.x v16, a1
-; RV32-NEXT: vmerge.vxm v16, v16, a0, v0
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16,...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/107290
More information about the llvm-commits
mailing list