[llvm] [TTI][RISCV] Add cost modelling for intrinsic vp.load.ff (PR #160470)

Wed Sep 24 06:35:47 PDT 2025

================
@@ -836,74 +836,106 @@ define void @abs() {
   ret void
 }
 
-define void @load() {
+define void @load(ptr %src) {
 ; CHECK-LABEL: 'load'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = load <vscale x 2 x i8>, ptr undef, align 2
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t20 = load <vscale x 4 x i8>, ptr undef, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t22 = load <vscale x 8 x i8>, ptr undef, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t24 = load <vscale x 16 x i8>, ptr undef, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = load <vscale x 2 x i64>, ptr undef, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = load <vscale x 4 x i64>, ptr undef, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = load <vscale x 8 x i64>, ptr undef, align 64
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t32 = load <vscale x 16 x i64>, ptr undef, align 128
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr %src, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t3 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = load <4 x i8>, ptr %src, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <8 x i8>, ptr %src, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t9 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = load <16 x i8>, ptr %src, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t11 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t13 = load <2 x i64>, ptr %src, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t14 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t15 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t16 = load <4 x i64>, ptr %src, align 32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t18 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = load <8 x i64>, ptr %src, align 64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t20 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t22 = load <16 x i64>, ptr %src, align 128
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t23 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t25 = load <vscale x 2 x i8>, ptr %src, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t26 = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t27 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t28 = load <vscale x 4 x i8>, ptr %src, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t29 = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t30 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t31 = load <vscale x 8 x i8>, ptr %src, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t33 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t34 = load <vscale x 16 x i8>, ptr %src, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t35 = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t36 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t37 = load <vscale x 2 x i64>, ptr %src, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t39 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t40 = load <vscale x 4 x i64>, ptr %src, align 32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t41 = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t42 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t43 = load <vscale x 8 x i64>, ptr %src, align 64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t44 = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t45 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr %src, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t46 = load <vscale x 16 x i64>, ptr %src, align 128
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t47 = call { <vscale x 16 x i64>, i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, <vscale x 16 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-  %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef)
-  %t1 = load <2 x i8>, ptr undef
-  %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef)
-  %t3 = load <4 x i8>, ptr undef
-  %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef)
-  %t5 = load <8 x i8>, ptr undef
-  %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef)
-  %t7 = load <16 x i8>, ptr undef
-  %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef)
-  %t9 = load <2 x i64>, ptr undef
-  %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef)
-  %t12 = load <4 x i64>, ptr undef
-  %t13 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef)
-  %t14 = load <8 x i64>, ptr undef
-  %t15 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef)
-  %t16 = load <16 x i64>, ptr undef
-  %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-  %t18 = load <vscale x 2 x i8>, ptr undef
-  %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-  %t20 = load <vscale x 4 x i8>, ptr undef
-  %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-  %t22 = load <vscale x 8 x i8>, ptr undef
-  %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-  %t24 = load <vscale x 16 x i8>, ptr undef
-  %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr undef, <vscale x 2 x i1> undef, i32 undef)
-  %t26 = load <vscale x 2 x i64>, ptr undef
-  %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr undef, <vscale x 4 x i1> undef, i32 undef)
-  %t28 = load <vscale x 4 x i64>, ptr undef
-  %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr undef, <vscale x 8 x i1> undef, i32 undef)
-  %t30 = load <vscale x 8 x i64>, ptr undef
-  %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr undef, <vscale x 16 x i1> undef, i32 undef)
-  %t32 = load <vscale x 16 x i64>, ptr undef
+  %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr %src, <2 x i1> undef, i32 undef)
+  %t1 = load <2 x i8>, ptr %src
----------------
lukel97 wrote:

Nit I don't think we need to test regular loads

https://github.com/llvm/llvm-project/pull/160470