[llvm] [AArch64] Define cost of i16->i32 udot/sdot instructions (PR #174102)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 5 04:51:53 PST 2026
================
@@ -0,0 +1,37 @@
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 \
+; RUN: -enable-epilogue-vectorization=false -debug-only=loop-vectorize \
+; RUN: -mattr=+sve2p1 -scalable-vectorization=off \
+; RUN: -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-FIXED
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 \
+; RUN: -enable-epilogue-vectorization=false -debug-only=loop-vectorize \
+; RUN: -mattr=+sve2p1 -scalable-vectorization=on \
+; RUN: -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALABLE
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 \
+; RUN: -enable-epilogue-vectorization=false -debug-only=loop-vectorize \
+; RUN: -mattr=+sve2,+sme2 -scalable-vectorization=on \
+; RUN: -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALABLE
+
+; CHECK-FIXED: Cost of 1 for VF 8: EXPRESSION vp<%8> = ir<%acc> + partial.reduce.add (ir<%load> sext to i32)
+; CHECK-SCALABLE: Cost of 1 for VF vscale x 8: EXPRESSION vp<%8> = ir<%acc> + partial.reduce.add (ir<%load> sext to i32)
+
+target triple = "aarch64"
+
+define i32 @sext_reduction_i16_to_i32(ptr %arr, i32 %n) vscale_range(1,16) {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %acc = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %gep = getelementptr inbounds i16, ptr %arr, i32 %iv
+ %load = load i16, ptr %gep
+ %sext = sext i16 %load to i32
----------------
david-arm wrote:
Is it worth having a test for udot too (i.e. zext)?
https://github.com/llvm/llvm-project/pull/174102
More information about the llvm-commits
mailing list