[llvm] [IR][LangRef] Add partial reduction add intrinsic (PR #94499)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 6 02:01:17 PDT 2024
================
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -force-vector-interleave=1 %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-unknown-elf"
+
+define void @partial_reduce_add(<vscale x 16 x i8> %wide.load.pre, <vscale x 16 x i32> %0, <vscale x 16 x i32> %1, i64 %index) #0 {
+; CHECK-LABEL: partial_reduce_add:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: index z2.s, #0, #1
+; CHECK-NEXT: mov z4.s, w8
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: ptrue p2.s, vl1
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1w { z5.s }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT: mov z6.s, w8
+; CHECK-NEXT: cmpeq p1.s, p0/z, z2.s, z4.s
+; CHECK-NEXT: uaddv d3, p0, z0.s
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: uaddv d7, p0, z1.s
+; CHECK-NEXT: uaddv d4, p0, z5.s
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: fmov x8, d3
+; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT: mov z1.s, p2/m, w8
+; CHECK-NEXT: mov w8, #3 // =0x3
+; CHECK-NEXT: cmpeq p2.s, p0/z, z2.s, z6.s
+; CHECK-NEXT: mov z5.s, w8
+; CHECK-NEXT: fmov x8, d7
+; CHECK-NEXT: uaddv d3, p0, z3.s
+; CHECK-NEXT: mov z1.s, p1/m, w8
+; CHECK-NEXT: fmov x8, d4
+; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z5.s
+; CHECK-NEXT: mov z1.s, p2/m, w8
+; CHECK-NEXT: fmov x8, d3
+; CHECK-NEXT: mov z1.s, p0/m, w8
+; CHECK-NEXT: addvl x8, x1, #1
+; CHECK-NEXT: .LBB0_1: // %vector.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: orr z0.d, z1.d, z0.d
+; CHECK-NEXT: cbnz x8, .LBB0_1
+; CHECK-NEXT: // %bb.2: // %middle.block
+; CHECK-NEXT: ret
+entry:
+ %2 = call i64 @llvm.vscale.i64()
+ %3 = mul i64 %2, 16
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
----------------
davemgreen wrote:
It doesn't need a loop for the test.
https://github.com/llvm/llvm-project/pull/94499
More information about the llvm-commits
mailing list