[llvm] [IR] Reject invalid partial.reduce intrinsics (PR #161831)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 3 05:12:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
The requirement that element types of the accumulator and input vector must match was previously untested. This patch also improves the error message by printing the call instruction.
---
Full diff: https://github.com/llvm/llvm-project/pull/161831.diff
3 Files Affected:
- (modified) llvm/lib/IR/Verifier.cpp (+8-5)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll (-106)
- (added) llvm/test/Verifier/partial-reduce.ll (+19)
``````````diff
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 6b3cd27b77a7a..b6acba48517c5 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6588,14 +6588,17 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
VectorType *AccTy = cast<VectorType>(Call.getArgOperand(0)->getType());
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
- unsigned VecWidth = VecTy->getElementCount().getKnownMinValue();
- unsigned AccWidth = AccTy->getElementCount().getKnownMinValue();
+ ElementCount VecWidth = VecTy->getElementCount();
+ ElementCount AccWidth = AccTy->getElementCount();
- Check((VecWidth % AccWidth) == 0,
+ Check(VecWidth.hasKnownScalarFactor(AccWidth),
"Invalid vector widths for partial "
"reduction. The width of the input vector "
- "must be a positive integer multiple of "
- "the width of the accumulator vector.");
+ "must be a known integer multiple of "
+ "the width of the accumulator vector.", &Call);
+
+ Check(AccTy->getElementType() == VecTy->getElementType(),
+ "Elements type of accumulator and input type must match", &Call);
break;
}
case Intrinsic::experimental_noalias_scope_decl: {
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll
index ebb2da9a3edd2..5385b2392dde7 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll
@@ -913,112 +913,6 @@ middle.block: ; preds = %vector.body
ret i32 %0
}
-define i16 @invalid_type(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b) {
-; CHECK-SVE2-LABEL: define i16 @invalid_type(
-; CHECK-SVE2-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) #[[ATTR0]] {
-; CHECK-SVE2-NEXT: [[ENTRY:.*]]:
-; CHECK-SVE2-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK-SVE2: [[VECTOR_BODY]]:
-; CHECK-SVE2-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-SVE2-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
-; CHECK-SVE2-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
-; CHECK-SVE2-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
-; CHECK-SVE2-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
-; CHECK-SVE2-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
-; CHECK-SVE2-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
-; CHECK-SVE2-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
-; CHECK-SVE2-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
-; CHECK-SVE2-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
-; CHECK-SVE2-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
-; CHECK-SVE2-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
-; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL]])
-; CHECK-SVE2-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
-; CHECK-SVE2-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
-; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
-; CHECK-SVE2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
-; CHECK-SVE2: [[MIDDLE_BLOCK]]:
-; CHECK-SVE2-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> [[PARTIAL_REDUCE_SUB]])
-; CHECK-SVE2-NEXT: ret i16 [[TMP0]]
-;
-; CHECK-SVE-LABEL: define i16 @invalid_type(
-; CHECK-SVE-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) #[[ATTR0]] {
-; CHECK-SVE-NEXT: [[ENTRY:.*]]:
-; CHECK-SVE-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK-SVE: [[VECTOR_BODY]]:
-; CHECK-SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-SVE-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
-; CHECK-SVE-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
-; CHECK-SVE-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
-; CHECK-SVE-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
-; CHECK-SVE-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
-; CHECK-SVE-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
-; CHECK-SVE-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
-; CHECK-SVE-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
-; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
-; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
-; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
-; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL]])
-; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
-; CHECK-SVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
-; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
-; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
-; CHECK-SVE: [[MIDDLE_BLOCK]]:
-; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> [[PARTIAL_REDUCE_SUB]])
-; CHECK-SVE-NEXT: ret i16 [[TMP0]]
-;
-; CHECK-NOSVE-LABEL: define i16 @invalid_type(
-; CHECK-NOSVE-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) {
-; CHECK-NOSVE-NEXT: [[ENTRY:.*]]:
-; CHECK-NOSVE-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK-NOSVE: [[VECTOR_BODY]]:
-; CHECK-NOSVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NOSVE-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
-; CHECK-NOSVE-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
-; CHECK-NOSVE-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
-; CHECK-NOSVE-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
-; CHECK-NOSVE-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
-; CHECK-NOSVE-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
-; CHECK-NOSVE-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
-; CHECK-NOSVE-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
-; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
-; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
-; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
-; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL]])
-; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
-; CHECK-NOSVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
-; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
-; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
-; CHECK-NOSVE: [[MIDDLE_BLOCK]]:
-; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> [[PARTIAL_REDUCE_SUB]])
-; CHECK-NOSVE-NEXT: ret i16 [[TMP0]]
-;
-entry:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %entry
- %vec.phi = phi <vscale x 8 x i16> [ zeroinitializer, %entry ], [ %partial.reduce.sub, %vector.body ]
- %a.deinterleaved = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.v32i8(<vscale x 32 x i8> %a)
- %b.deinterleaved = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.v32i8(<vscale x 32 x i8> %b)
- %a.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 0
- %a.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 1
- %b.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 0
- %b.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 1
- %a.real.ext = sext <vscale x 16 x i8> %a.real to <vscale x 16 x i32>
- %a.imag.ext = sext <vscale x 16 x i8> %a.imag to <vscale x 16 x i32>
- %b.real.ext = sext <vscale x 16 x i8> %b.real to <vscale x 16 x i32>
- %b.imag.ext = sext <vscale x 16 x i8> %b.imag to <vscale x 16 x i32>
- %real.mul = mul <vscale x 16 x i32> %b.real.ext, %a.real.ext
- %real.mul.reduced = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> %vec.phi, <vscale x 16 x i32> %real.mul)
- %imag.mul = mul <vscale x 16 x i32> %b.imag.ext, %a.imag.ext
- %imag.mul.neg = sub <vscale x 16 x i32> zeroinitializer, %imag.mul
- %partial.reduce.sub = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> %real.mul.reduced, <vscale x 16 x i32> %imag.mul.neg)
- br i1 true, label %middle.block, label %vector.body
-
-middle.block: ; preds = %vector.body
- %0 = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %partial.reduce.sub)
- ret i16 %0
-}
-
define i32 @not_cdotp_i8_rot0_fixed_length(<32 x i8> %a, <32 x i8> %b) {
; CHECK-SVE2-LABEL: define i32 @not_cdotp_i8_rot0_fixed_length(
; CHECK-SVE2-SAME: <32 x i8> [[A:%.*]], <32 x i8> [[B:%.*]]) #[[ATTR0]] {
diff --git a/llvm/test/Verifier/partial-reduce.ll b/llvm/test/Verifier/partial-reduce.ll
new file mode 100644
index 0000000000000..97cb6b6b21511
--- /dev/null
+++ b/llvm/test/Verifier/partial-reduce.ll
@@ -0,0 +1,19 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @element_count_mismatch() {
+ ; CHECK: Invalid vector widths for partial reduction. The width of the input vector must be a known integer multiple of the width of the accumulator vector.
+ call <3 x i32> @llvm.vector.partial.reduce.add(<3 x i32> poison, <8 x i32> poison)
+
+ ; CHECK: Invalid vector widths for partial reduction. The width of the input vector must be a known integer multiple of the width of the accumulator vector.
+ call <vscale x 4 x i32> @llvm.vector.partial.reduce.add(<vscale x 4 x i32> poison, <8 x i32> poison)
+
+ ; CHECK: Invalid vector widths for partial reduction. The width of the input vector must be a known integer multiple of the width of the accumulator vector.
+ call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> poison, <vscale x 8 x i32> poison)
+ ret void
+}
+
+define void @element_type_mismatch() {
+ ; CHECK: Elements type of accumulator and input type must match
+ call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> poison, <8 x i8> poison)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/161831
More information about the llvm-commits
mailing list