[llvm] [RISCV] Don't unroll vectorized loops with vector operands (PR #171089)

Mon Dec 8 00:22:15 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Pengcheng Wang (wangpc-pp)

<details>
<summary>Changes</summary>

We have disabled unrolling for vectorized loops in #151525 but this
PR only checked the instruction type.

For some loops, there is no instruction with vector type but they
are still vector operations (just like the memset zero test in the
precommit test).

Here we check the operands as well to cover these cases.



---
Full diff: https://github.com/llvm/llvm-project/pull/171089.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+4-1) 
- (modified) llvm/test/Transforms/LoopUnroll/RISCV/vector.ll (+129-7) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index afc2f2c9cd07b..79cd651febf85 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2802,7 +2802,10 @@ void RISCVTTIImpl::getUnrollingPreferences(
       // Both auto-vectorized loops and the scalar remainder have the
       // isvectorized attribute, so differentiate between them by the presence
       // of vector instructions.
-      if (IsVectorized && I.getType()->isVectorTy())
+      if (IsVectorized && (I.getType()->isVectorTy() ||
+                           llvm::any_of(I.operand_values(), [](Value *V) {
+                             return V->getType()->isVectorTy();
+                           })))
         return;
 
       if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll b/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
index b575057ff6d15..b441f42f267af 100644
--- a/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -p loop-unroll -mtriple riscv64 -mattr=+v,+f -S %s | FileCheck %s --check-prefixes=COMMON,CHECK
-; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-s76 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE
+; RUN: opt -p loop-unroll -mtriple=riscv64 -mcpu=sifive-p870 -S %s | FileCheck %s --check-prefixes=COMMON,SIFIVE
 
 define void @reverse(ptr %dst, ptr %src, i64 %len) {
 ; CHECK-LABEL: define void @reverse(
@@ -248,7 +248,7 @@ define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) {
 ; SIFIVE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; SIFIVE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; SIFIVE:       [[VECTOR_BODY]]:
-; SIFIVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; SIFIVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT_15:%.*]], %[[VECTOR_BODY]] ]
 ; SIFIVE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]]
 ; SIFIVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; SIFIVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]]
@@ -276,9 +276,93 @@ define void @saxpy_tripcount1K_av0(ptr %dst, ptr %src, float %a) {
 ; SIFIVE-NEXT:    [[WIDE_LOAD12_3:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
 ; SIFIVE-NEXT:    [[TMP11:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_3]], <4 x float> [[WIDE_LOAD12_3]])
 ; SIFIVE-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
-; SIFIVE-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 16
-; SIFIVE-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; SIFIVE-NEXT:    br i1 [[TMP3]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
+; SIFIVE-NEXT:    [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
+; SIFIVE-NEXT:    [[TMP49:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP49]], align 4
+; SIFIVE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_4:%.*]] = load <4 x float>, ptr [[TMP13]], align 4
+; SIFIVE-NEXT:    [[TMP14:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_4]], <4 x float> [[WIDE_LOAD12_4]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP14]], ptr [[TMP13]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX]], 20
+; SIFIVE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_4]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP15]], align 4
+; SIFIVE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_4]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_5:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
+; SIFIVE-NEXT:    [[TMP17:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_5]], <4 x float> [[WIDE_LOAD12_5]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP17]], ptr [[TMP16]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX]], 24
+; SIFIVE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_5]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP18]], align 4
+; SIFIVE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_5]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_6:%.*]] = load <4 x float>, ptr [[TMP19]], align 4
+; SIFIVE-NEXT:    [[TMP20:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_6]], <4 x float> [[WIDE_LOAD12_6]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP20]], ptr [[TMP19]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_6:%.*]] = add nuw nsw i64 [[INDEX]], 28
+; SIFIVE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_6]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP21]], align 4
+; SIFIVE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_6]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4
+; SIFIVE-NEXT:    [[TMP23:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_7]], <4 x float> [[WIDE_LOAD12_7]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP22]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX]], 32
+; SIFIVE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_7]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP24]], align 4
+; SIFIVE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_7]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_8:%.*]] = load <4 x float>, ptr [[TMP25]], align 4
+; SIFIVE-NEXT:    [[TMP26:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_8]], <4 x float> [[WIDE_LOAD12_8]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP26]], ptr [[TMP25]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_8:%.*]] = add nuw nsw i64 [[INDEX]], 36
+; SIFIVE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_8]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4
+; SIFIVE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_8]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_9:%.*]] = load <4 x float>, ptr [[TMP28]], align 4
+; SIFIVE-NEXT:    [[TMP29:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_9]], <4 x float> [[WIDE_LOAD12_9]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP29]], ptr [[TMP28]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_9:%.*]] = add nuw nsw i64 [[INDEX]], 40
+; SIFIVE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_9]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_10:%.*]] = load <4 x float>, ptr [[TMP30]], align 4
+; SIFIVE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_9]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_10:%.*]] = load <4 x float>, ptr [[TMP31]], align 4
+; SIFIVE-NEXT:    [[TMP32:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_10]], <4 x float> [[WIDE_LOAD12_10]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP32]], ptr [[TMP31]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_10:%.*]] = add nuw nsw i64 [[INDEX]], 44
+; SIFIVE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_10]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_11:%.*]] = load <4 x float>, ptr [[TMP33]], align 4
+; SIFIVE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_10]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4
+; SIFIVE-NEXT:    [[TMP35:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_11]], <4 x float> [[WIDE_LOAD12_11]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP35]], ptr [[TMP34]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_11:%.*]] = add nuw nsw i64 [[INDEX]], 48
+; SIFIVE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_11]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_12:%.*]] = load <4 x float>, ptr [[TMP36]], align 4
+; SIFIVE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_11]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_12:%.*]] = load <4 x float>, ptr [[TMP37]], align 4
+; SIFIVE-NEXT:    [[TMP38:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_12]], <4 x float> [[WIDE_LOAD12_12]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP38]], ptr [[TMP37]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_12:%.*]] = add nuw nsw i64 [[INDEX]], 52
+; SIFIVE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_12]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_13:%.*]] = load <4 x float>, ptr [[TMP39]], align 4
+; SIFIVE-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_12]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_13:%.*]] = load <4 x float>, ptr [[TMP40]], align 4
+; SIFIVE-NEXT:    [[TMP41:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_13]], <4 x float> [[WIDE_LOAD12_13]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP41]], ptr [[TMP40]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_13:%.*]] = add nuw nsw i64 [[INDEX]], 56
+; SIFIVE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_13]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_14:%.*]] = load <4 x float>, ptr [[TMP42]], align 4
+; SIFIVE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_13]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_14:%.*]] = load <4 x float>, ptr [[TMP43]], align 4
+; SIFIVE-NEXT:    [[TMP44:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_14]], <4 x float> [[WIDE_LOAD12_14]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP44]], ptr [[TMP43]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_14:%.*]] = add nuw nsw i64 [[INDEX]], 60
+; SIFIVE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX_NEXT_14]]
+; SIFIVE-NEXT:    [[WIDE_LOAD_15:%.*]] = load <4 x float>, ptr [[TMP45]], align 4
+; SIFIVE-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX_NEXT_14]]
+; SIFIVE-NEXT:    [[WIDE_LOAD12_15:%.*]] = load <4 x float>, ptr [[TMP46]], align 4
+; SIFIVE-NEXT:    [[TMP47:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[WIDE_LOAD_15]], <4 x float> [[WIDE_LOAD12_15]])
+; SIFIVE-NEXT:    store <4 x float> [[TMP47]], ptr [[TMP46]], align 4
+; SIFIVE-NEXT:    [[INDEX_NEXT_15]] = add nuw nsw i64 [[INDEX]], 64
+; SIFIVE-NEXT:    [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT_15]], 1024
+; SIFIVE-NEXT:    br i1 [[TMP48]], label %[[EXIT:.*]], label %[[VECTOR_BODY]]
 ; SIFIVE:       [[EXIT]]:
 ; SIFIVE-NEXT:    ret void
 ;
@@ -345,8 +429,6 @@ vector.body:                                      ; preds = %vector.body, %entry
 exit:                                 ; preds = %vector.body
   ret void
 }
-!0 = !{!0, !1}
-!1 = !{!"llvm.loop.isvectorized", i32 1}
 
 ; On SiFive we should runtime unroll the scalar epilogue loop, but not the
 ; vector loop.
@@ -587,6 +669,46 @@ exit:
   ret void
 }
 
+define void @vector_operands(ptr %p, i64 %n) {
+; COMMON-LABEL: define void @vector_operands(
+; COMMON-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT:  [[ENTRY:.*]]:
+; COMMON-NEXT:    br label %[[VECTOR_BODY:.*]]
+; COMMON:       [[VECTOR_BODY]]:
+; COMMON-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; COMMON-NEXT:    [[AVL:%.*]] = phi i64 [ [[N]], %[[ENTRY]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; COMMON-NEXT:    [[VL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; COMMON-NEXT:    [[ADDR:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]]
+; COMMON-NEXT:    call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ADDR]], <vscale x 2 x i1> splat (i1 true), i32 [[VL]])
+; COMMON-NEXT:    [[VL_ZEXT:%.*]] = zext i32 [[VL]] to i64
+; COMMON-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[VL_ZEXT]], [[EVL_BASED_IV]]
+; COMMON-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[VL_ZEXT]]
+; COMMON-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; COMMON-NEXT:    br i1 [[TMP0]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    ret void
+;
+entry:
+  br label %vector.body
+
+vector.body:
+  %evl.based.iv = phi i64 [ 0, %entry ], [ %index.evl.next, %vector.body ]
+  %avl = phi i64 [ %n, %entry ], [ %avl.next, %vector.body ]
+  %vl = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 2, i1 true)
+  %addr = getelementptr i64, ptr %p, i64 %evl.based.iv
+  call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> splat (i64 0), ptr align 8 %addr, <vscale x 2 x i1> splat (i1 true), i32 %vl)
+  %vl.zext = zext i32 %vl to i64
+  %index.evl.next = add nuw i64 %vl.zext, %evl.based.iv
+  %avl.next = sub nuw i64 %avl, %vl.zext
+  %3 = icmp eq i64 %avl.next, 0
+  br i1 %3, label %exit, label %vector.body, !llvm.loop !2
+
+exit:
+  ret void
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.isvectorized", i32 1}
 !2 = distinct !{!2, !1}
 !3 = distinct !{!3, !1}
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/171089