[llvm] [RISCV] Fix coalescing vsetvlis when AVL and vl registers are the same (PR #141941)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 10:29:07 PDT 2025
================
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-linux-gnu"
+
+define void @pr141907(ptr %0) #0 {
+; CHECK-LABEL: pr141907:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmclr.m v0
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: .LBB0_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vs4r.v v8, (a2)
+; CHECK-NEXT: vsetvli a1, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v11, v9, 0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; CHECK-NEXT: vlseg3e32.v v8, (a2)
+; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
+; CHECK-NEXT: vsseg2e16.v v11, (zero)
+; CHECK-NEXT: bnez a1, .LBB0_1
+; CHECK-NEXT: .LBB0_2: # %while.body5
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: j .LBB0_2
+entry:
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %evl.based.iv = phi i64 [ 0, %entry ], [ %2, %vector.body ]
+ %vector.recur = phi <vscale x 2 x i32> [ zeroinitializer, %entry ], [ %3, %vector.body ]
+ %1 = call i32 @llvm.experimental.get.vector.length.i64(i64 %evl.based.iv, i32 1, i1 true)
+ %2 = zext i32 %1 to i64
+ %wide.masked.load = call <vscale x 6 x i32> @llvm.vp.load.nxv6i32.p0(ptr null, <vscale x 6 x i1> zeroinitializer, i32 0)
+ %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3.nxv6i32(<vscale x 6 x i32> %wide.masked.load)
+ %3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
+ %vp.cast65 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32> %vector.recur, <vscale x 2 x i1> zeroinitializer, i32 0)
+ %interleaved.vec = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %vp.cast65, <vscale x 2 x i16> zeroinitializer)
+ call void @llvm.vp.store.nxv4i16.p0(<vscale x 4 x i16> %interleaved.vec, ptr null, <vscale x 4 x i1> splat (i1 true), i32 0)
+ %4 = icmp eq i32 %1, 0
+ br i1 %4, label %while.body5, label %vector.body
+
+while.body5: ; preds = %while.body5, %vector.body
+ %5 = bitcast <vscale x 2 x i32> %3 to <vscale x 4 x i16>
+ %cond52 = extractelement <vscale x 4 x i16> %5, i64 0
+ store i16 %cond52, ptr %0, align 2
+ br label %while.body5
+}
+
+attributes #0 = { "target-cpu"="sifive-p670" "target-features"="+64bit,+a,+c,+d,+experimental,+f,+m,+relax,+unaligned-scalar-mem,+unaligned-vector-mem,+v,+xsifivecdiscarddlone,+xsifivecflushdlone,+za64rs,+zaamo,+zalrsc,+zba,+zbb,+zbs,+zca,+zcd,+zfhmin,+zic64b,+zicbom,+zicbop,+zicboz,+ziccamoa,+ziccif,+zicclsm,+ziccrse,+zicsr,+zifencei,+zihintntl,+zihintpause,+zihpm,+zmmul,+zvbb,+zvbc,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvkb,+zvkg,+zvkn,+zvknc,+zvkned,+zvkng,+zvknhb,+zvks,+zvksc,+zvksed,+zvksg,+zvksh,+zvkt,+zvl128b,+zvl32b,+zvl64b,-b,-e,-experimental-p,-experimental-smctr" }
----------------
mshockwave wrote:
I think we don't need most of these features
https://github.com/llvm/llvm-project/pull/141941
More information about the llvm-commits
mailing list