[llvm] [RISCV] Don't combine store of vmv.x.s/vfmv.f.s to vp_store with VL of 1 when it's indexed store (PR #73219)

Zi Xuan Wu via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 23 00:46:30 PST 2023


https://github.com/zixuan-wu created https://github.com/llvm/llvm-project/pull/73219

Because we can't support vp_store with indexed address mode by lowering to vse intrinsic later.

>From b9ac11278af942161d0b9425566116eadef429f9 Mon Sep 17 00:00:00 2001
From: "Zi Xuan Wu (Zeson)" <zixuan.wu at linux.alibaba.com>
Date: Thu, 23 Nov 2023 15:39:11 +0800
Subject: [PATCH] [RISCV] Don't combine store of vmv.x.s/vfmv.f.s to vp_store
 with VL of 1 when it's indexed store

Because we can't support vp_store with indexed address mode by lowering
to vse intrinsic later.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  2 +-
 .../RISCV/rvv/combine-store-extract-crash.ll  | 85 +++++++++++++++++++
 2 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 97f9fb2223f1ae4..84f698baa2b47e9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15264,7 +15264,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       SDValue Src = Val.getOperand(0);
       MVT VecVT = Src.getSimpleValueType();
       // VecVT should be scalable and memory VT should match the element type.
-      if (VecVT.isScalableVector() &&
+      if (!Store->isIndexed() && VecVT.isScalableVector() &&
           MemVT == VecVT.getVectorElementType()) {
         SDLoc DL(N);
         MVT MaskVT = getMaskTypeFor(VecVT);
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
new file mode 100644
index 000000000000000..7916822a7056789
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=riscv32 -mattr=+v,+xtheadmemidx -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+xtheadmemidx -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix RV64
+
+define void @test(ptr %ref_array, ptr %sad_array) #0 {
+; RV32-LABEL: test:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    th.lwd a2, a3, (a0), 0, 3
+; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; RV32-NEXT:    vle8.v v8, (a2)
+; RV32-NEXT:    vmv.v.i v9, 0
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vzext.vf4 v12, v8
+; RV32-NEXT:    vmv.s.x v8, zero
+; RV32-NEXT:    vredsum.vs v10, v12, v8
+; RV32-NEXT:    vmv.x.s a0, v10
+; RV32-NEXT:    th.swia a0, (a1), 4, 0
+; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; RV32-NEXT:    vle8.v v10, (a3)
+; RV32-NEXT:    vsetivli zero, 8, e8, m1, tu, ma
+; RV32-NEXT:    vslideup.vi v10, v9, 4
+; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT:    vzext.vf4 v12, v10
+; RV32-NEXT:    vredsum.vs v8, v12, v8
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    vse32.v v8, (a1)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    th.ldd a2, a3, (a0), 0, 4
+; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; RV64-NEXT:    vle8.v v8, (a2)
+; RV64-NEXT:    vmv.v.i v9, 0
+; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT:    vzext.vf4 v12, v8
+; RV64-NEXT:    vmv.s.x v8, zero
+; RV64-NEXT:    vredsum.vs v10, v12, v8
+; RV64-NEXT:    vmv.x.s a0, v10
+; RV64-NEXT:    th.swia a0, (a1), 4, 0
+; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; RV64-NEXT:    vle8.v v10, (a3)
+; RV64-NEXT:    vsetivli zero, 8, e8, m1, tu, ma
+; RV64-NEXT:    vslideup.vi v10, v9, 4
+; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT:    vzext.vf4 v12, v10
+; RV64-NEXT:    vredsum.vs v8, v12, v8
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT:    vse32.v v8, (a1)
+; RV64-NEXT:    ret
+entry:
+  %0 = load ptr, ptr %ref_array, align 8
+  %1 = load <4 x i8>, ptr %0, align 1
+  %2 = shufflevector <4 x i8> %1, <4 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %3 = zext <16 x i8> %2 to <16 x i32>
+  %4 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3)
+  store i32 %4, ptr %sad_array, align 4, !tbaa !0
+  %arrayidx.1 = getelementptr ptr, ptr %ref_array, i64 1
+  %5 = load ptr, ptr %arrayidx.1, align 8, !tbaa !4
+  %6 = load <4 x i8>, ptr %5, align 1
+  %7 = shufflevector <4 x i8> %6, <4 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %8 = zext <16 x i8> %7 to <16 x i32>
+  %9 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %8)
+  %arrayidx2.1 = getelementptr i32, ptr %sad_array, i64 1
+  store i32 %9, ptr %arrayidx2.1, align 4
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #1
+
+; uselistorder directives
+uselistorder ptr @llvm.vector.reduce.add.v16i32, { 1, 0 }
+
+attributes #0 = { "target-features"="+a,+c,+d,+f,+m,+relax,+v,+xtheadba,+xtheadbb,+xtheadbs,+xtheadcmo,+xtheadcondmov,+xtheadfmemidx,+xtheadmac,+xtheadmemidx,+xtheadmempair,+xtheadsync,+xtheadvdot,+zba,+zbb,+zbc,+zbs,+zfh,+zicsr,+zihintpause,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"int", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"any pointer", !2, i64 0}



More information about the llvm-commits mailing list