[llvm] [AArch64] Lower alias mask to a whilewr (PR #100769)
Sam Tebbs via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 06:35:46 PDT 2024
================
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc %s -mtriple=aarch64-linux-gnu -O3 -mattr=+sve2 -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnu -O3 -mattr=+sve -o - | FileCheck %s --check-prefix=CHECK-NOSVE2
+define dso_local <vscale x 16 x i1> @whilewr_8(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
+; CHECK-LABEL: whilewr_8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilewr p0.b, x1, x2
+; CHECK-NEXT: ret
+;
+; CHECK-NOSVE2-LABEL: whilewr_8:
+; CHECK-NOSVE2: // %bb.0: // %entry
+; CHECK-NOSVE2-NEXT: sub x8, x1, x2
+; CHECK-NOSVE2-NEXT: cmp x8, #0
+; CHECK-NOSVE2-NEXT: cset w9, lt
+; CHECK-NOSVE2-NEXT: whilelo p0.b, xzr, x8
+; CHECK-NOSVE2-NEXT: sbfx x8, x9, #0, #1
+; CHECK-NOSVE2-NEXT: whilelo p1.b, xzr, x8
+; CHECK-NOSVE2-NEXT: sel p0.b, p0, p0.b, p1.b
+; CHECK-NOSVE2-NEXT: ret
+entry:
+ %c14 = ptrtoint ptr %c to i64
+ %b15 = ptrtoint ptr %b to i64
+ %sub.diff = sub i64 %b15, %c14
+ %neg.compare = icmp slt i64 %sub.diff, 0
+ %.splatinsert = insertelement <vscale x 16 x i1> poison, i1 %neg.compare, i64 0
+ %.splat = shufflevector <vscale x 16 x i1> %.splatinsert, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %ptr.diff.lane.mask = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 %sub.diff)
+ %active.lane.mask.alias = or <vscale x 16 x i1> %ptr.diff.lane.mask, %.splat
+ ret <vscale x 16 x i1> %active.lane.mask.alias
+}
+
+define dso_local <vscale x 8 x i1> @whilewr_16(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
+; CHECK-LABEL: whilewr_16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilewr p0.h, x1, x2
+; CHECK-NEXT: ret
+;
+; CHECK-NOSVE2-LABEL: whilewr_16:
+; CHECK-NOSVE2: // %bb.0: // %entry
+; CHECK-NOSVE2-NEXT: sub x8, x1, x2
+; CHECK-NOSVE2-NEXT: cmn x8, #1
+; CHECK-NOSVE2-NEXT: add x8, x8, x8, lsr #63
+; CHECK-NOSVE2-NEXT: cset w9, lt
+; CHECK-NOSVE2-NEXT: sbfx x9, x9, #0, #1
+; CHECK-NOSVE2-NEXT: asr x8, x8, #1
+; CHECK-NOSVE2-NEXT: whilelo p0.h, xzr, x9
+; CHECK-NOSVE2-NEXT: whilelo p1.h, xzr, x8
+; CHECK-NOSVE2-NEXT: mov p0.b, p1/m, p1.b
+; CHECK-NOSVE2-NEXT: ret
+entry:
+ %b14 = ptrtoint ptr %b to i64
+ %c15 = ptrtoint ptr %c to i64
+ %sub.diff = sub i64 %b14, %c15
+ %diff = sdiv i64 %sub.diff, 2
+ %neg.compare = icmp slt i64 %sub.diff, -1
+ %.splatinsert = insertelement <vscale x 8 x i1> poison, i1 %neg.compare, i64 0
+ %.splat = shufflevector <vscale x 8 x i1> %.splatinsert, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %ptr.diff.lane.mask = tail call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 %diff)
+ %active.lane.mask.alias = or <vscale x 8 x i1> %ptr.diff.lane.mask, %.splat
+ ret <vscale x 8 x i1> %active.lane.mask.alias
+}
+
+define dso_local <vscale x 4 x i1> @whilewr_32(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
+; CHECK-LABEL: whilewr_32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilewr p0.s, x1, x2
+; CHECK-NEXT: ret
+;
+; CHECK-NOSVE2-LABEL: whilewr_32:
+; CHECK-NOSVE2: // %bb.0: // %entry
+; CHECK-NOSVE2-NEXT: sub x8, x1, x2
+; CHECK-NOSVE2-NEXT: add x9, x8, #3
+; CHECK-NOSVE2-NEXT: cmp x8, #0
+; CHECK-NOSVE2-NEXT: csel x9, x9, x8, lt
+; CHECK-NOSVE2-NEXT: cmn x8, #3
+; CHECK-NOSVE2-NEXT: cset w8, lt
+; CHECK-NOSVE2-NEXT: asr x9, x9, #2
+; CHECK-NOSVE2-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NOSVE2-NEXT: whilelo p1.s, xzr, x9
+; CHECK-NOSVE2-NEXT: whilelo p0.s, xzr, x8
+; CHECK-NOSVE2-NEXT: mov p0.b, p1/m, p1.b
+; CHECK-NOSVE2-NEXT: ret
+entry:
+ %b12 = ptrtoint ptr %b to i64
+ %c13 = ptrtoint ptr %c to i64
+ %sub.diff = sub i64 %b12, %c13
+ %diff = sdiv i64 %sub.diff, 4
+ %neg.compare = icmp slt i64 %sub.diff, -3
+ %.splatinsert = insertelement <vscale x 4 x i1> poison, i1 %neg.compare, i64 0
+ %.splat = shufflevector <vscale x 4 x i1> %.splatinsert, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %ptr.diff.lane.mask = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %diff)
+ %active.lane.mask.alias = or <vscale x 4 x i1> %ptr.diff.lane.mask, %.splat
+ ret <vscale x 4 x i1> %active.lane.mask.alias
+}
+
+define dso_local <vscale x 2 x i1> @whilewr_64(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
----------------
SamTebbs33 wrote:
Done.
https://github.com/llvm/llvm-project/pull/100769
More information about the llvm-commits
mailing list